diff --git "a/checkpoint-226236/trainer_state.json" "b/checkpoint-226236/trainer_state.json" deleted file mode 100644--- "a/checkpoint-226236/trainer_state.json" +++ /dev/null @@ -1,158410 +0,0 @@ -{ - "best_metric": 0.5896387696266174, - "best_model_checkpoint": "LanguageTutor_v1/core/models/models/modernbert_output/checkpoint-226236", - "epoch": 2.0, - "eval_steps": 500, - "global_step": 226236, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 8.840326031224031e-05, - "grad_norm": 12.360391616821289, - "learning_rate": 4.999852661232813e-05, - "loss": 1.8933, - "step": 10 - }, - { - "epoch": 0.00017680652062448062, - "grad_norm": 8.394221305847168, - "learning_rate": 4.999705322465626e-05, - "loss": 1.5802, - "step": 20 - }, - { - "epoch": 0.00026520978093672097, - "grad_norm": 5.720775604248047, - "learning_rate": 4.9995579836984394e-05, - "loss": 1.564, - "step": 30 - }, - { - "epoch": 0.00035361304124896124, - "grad_norm": 4.403193473815918, - "learning_rate": 4.999410644931252e-05, - "loss": 1.5793, - "step": 40 - }, - { - "epoch": 0.00044201630156120157, - "grad_norm": 7.755221843719482, - "learning_rate": 4.999263306164065e-05, - "loss": 1.6448, - "step": 50 - }, - { - "epoch": 0.0005304195618734419, - "grad_norm": 5.395235538482666, - "learning_rate": 4.999115967396878e-05, - "loss": 1.5412, - "step": 60 - }, - { - "epoch": 0.0006188228221856822, - "grad_norm": 6.468478679656982, - "learning_rate": 4.998968628629691e-05, - "loss": 1.5383, - "step": 70 - }, - { - "epoch": 0.0007072260824979225, - "grad_norm": 8.549371719360352, - "learning_rate": 4.9988212898625036e-05, - "loss": 1.5251, - "step": 80 - }, - { - "epoch": 0.0007956293428101628, - "grad_norm": 5.457655429840088, - "learning_rate": 4.998673951095317e-05, - "loss": 1.4951, - "step": 90 - }, - { - "epoch": 0.0008840326031224031, - "grad_norm": 8.569847106933594, - "learning_rate": 4.998526612328129e-05, - "loss": 1.4208, - "step": 100 - }, - { - "epoch": 0.0009724358634346435, - "grad_norm": 4.5735297203063965, - "learning_rate": 4.998379273560943e-05, - "loss": 1.4895, - "step": 110 - }, - { - "epoch": 0.0010608391237468839, - "grad_norm": 4.128511905670166, - "learning_rate": 4.9982319347937556e-05, - "loss": 1.4945, - "step": 120 - }, - { - "epoch": 0.001149242384059124, - "grad_norm": 3.3097379207611084, - "learning_rate": 4.9980845960265685e-05, - "loss": 1.606, - "step": 130 - }, - { - "epoch": 0.0012376456443713643, - "grad_norm": 2.2360458374023438, - "learning_rate": 4.997937257259381e-05, - "loss": 1.5863, - "step": 140 - }, - { - "epoch": 0.0013260489046836047, - "grad_norm": 5.046316623687744, - "learning_rate": 4.997789918492195e-05, - "loss": 1.5055, - "step": 150 - }, - { - "epoch": 0.001414452164995845, - "grad_norm": 4.0020036697387695, - "learning_rate": 4.997642579725007e-05, - "loss": 1.4883, - "step": 160 - }, - { - "epoch": 0.0015028554253080854, - "grad_norm": 2.5163402557373047, - "learning_rate": 4.9974952409578205e-05, - "loss": 1.3965, - "step": 170 - }, - { - "epoch": 0.0015912586856203256, - "grad_norm": 5.472707748413086, - "learning_rate": 4.9973479021906326e-05, - "loss": 1.4061, - "step": 180 - }, - { - "epoch": 0.001679661945932566, - "grad_norm": 5.013034820556641, - "learning_rate": 4.997200563423446e-05, - "loss": 1.518, - "step": 190 - }, - { - "epoch": 0.0017680652062448063, - "grad_norm": 3.9262633323669434, - "learning_rate": 4.997053224656259e-05, - "loss": 1.435, - "step": 200 - }, - { - "epoch": 0.0018564684665570467, - "grad_norm": 4.9150614738464355, - "learning_rate": 4.996905885889072e-05, - "loss": 1.4966, - "step": 210 - }, - { - "epoch": 0.001944871726869287, - "grad_norm": 4.164141654968262, - "learning_rate": 4.9967585471218847e-05, - "loss": 1.4522, - "step": 220 - }, - { - "epoch": 0.0020332749871815273, - "grad_norm": 4.787590503692627, - "learning_rate": 4.996611208354698e-05, - "loss": 1.4752, - "step": 230 - }, - { - "epoch": 0.0021216782474937678, - "grad_norm": 8.909902572631836, - "learning_rate": 4.99646386958751e-05, - "loss": 1.418, - "step": 240 - }, - { - "epoch": 0.0022100815078060078, - "grad_norm": 8.003477096557617, - "learning_rate": 4.996316530820324e-05, - "loss": 1.3951, - "step": 250 - }, - { - "epoch": 0.002298484768118248, - "grad_norm": 4.158975601196289, - "learning_rate": 4.996169192053137e-05, - "loss": 1.4096, - "step": 260 - }, - { - "epoch": 0.0023868880284304886, - "grad_norm": 3.142237663269043, - "learning_rate": 4.9960218532859495e-05, - "loss": 1.3951, - "step": 270 - }, - { - "epoch": 0.0024752912887427286, - "grad_norm": 3.579383611679077, - "learning_rate": 4.9958745145187623e-05, - "loss": 1.3693, - "step": 280 - }, - { - "epoch": 0.002563694549054969, - "grad_norm": 12.148061752319336, - "learning_rate": 4.995727175751576e-05, - "loss": 1.4162, - "step": 290 - }, - { - "epoch": 0.0026520978093672095, - "grad_norm": 2.9150171279907227, - "learning_rate": 4.995579836984388e-05, - "loss": 1.4773, - "step": 300 - }, - { - "epoch": 0.00274050106967945, - "grad_norm": 3.3658225536346436, - "learning_rate": 4.9954324982172015e-05, - "loss": 1.4936, - "step": 310 - }, - { - "epoch": 0.00282890432999169, - "grad_norm": 5.113858699798584, - "learning_rate": 4.995285159450014e-05, - "loss": 1.3727, - "step": 320 - }, - { - "epoch": 0.0029173075903039304, - "grad_norm": 4.562338829040527, - "learning_rate": 4.995137820682827e-05, - "loss": 1.4645, - "step": 330 - }, - { - "epoch": 0.003005710850616171, - "grad_norm": 9.740865707397461, - "learning_rate": 4.99499048191564e-05, - "loss": 1.4879, - "step": 340 - }, - { - "epoch": 0.0030941141109284112, - "grad_norm": 4.165475368499756, - "learning_rate": 4.994843143148453e-05, - "loss": 1.4096, - "step": 350 - }, - { - "epoch": 0.0031825173712406512, - "grad_norm": 7.811041831970215, - "learning_rate": 4.994695804381266e-05, - "loss": 1.3699, - "step": 360 - }, - { - "epoch": 0.0032709206315528917, - "grad_norm": 4.133144855499268, - "learning_rate": 4.994548465614079e-05, - "loss": 1.3714, - "step": 370 - }, - { - "epoch": 0.003359323891865132, - "grad_norm": 2.9689953327178955, - "learning_rate": 4.9944011268468914e-05, - "loss": 1.3125, - "step": 380 - }, - { - "epoch": 0.003447727152177372, - "grad_norm": 3.44071102142334, - "learning_rate": 4.994253788079705e-05, - "loss": 1.4217, - "step": 390 - }, - { - "epoch": 0.0035361304124896125, - "grad_norm": 5.091910362243652, - "learning_rate": 4.994106449312518e-05, - "loss": 1.3862, - "step": 400 - }, - { - "epoch": 0.003624533672801853, - "grad_norm": 5.2655930519104, - "learning_rate": 4.9939591105453306e-05, - "loss": 1.4303, - "step": 410 - }, - { - "epoch": 0.0037129369331140934, - "grad_norm": 5.5515666007995605, - "learning_rate": 4.9938117717781434e-05, - "loss": 1.3853, - "step": 420 - }, - { - "epoch": 0.0038013401934263334, - "grad_norm": 5.711978435516357, - "learning_rate": 4.993664433010956e-05, - "loss": 1.3823, - "step": 430 - }, - { - "epoch": 0.003889743453738574, - "grad_norm": 3.29811692237854, - "learning_rate": 4.993517094243769e-05, - "loss": 1.4029, - "step": 440 - }, - { - "epoch": 0.003978146714050814, - "grad_norm": 4.7858405113220215, - "learning_rate": 4.9933697554765826e-05, - "loss": 1.345, - "step": 450 - }, - { - "epoch": 0.004066549974363055, - "grad_norm": 3.114687919616699, - "learning_rate": 4.993222416709395e-05, - "loss": 1.3509, - "step": 460 - }, - { - "epoch": 0.004154953234675295, - "grad_norm": 4.7597808837890625, - "learning_rate": 4.993075077942208e-05, - "loss": 1.426, - "step": 470 - }, - { - "epoch": 0.0042433564949875355, - "grad_norm": 4.4710469245910645, - "learning_rate": 4.992927739175021e-05, - "loss": 1.3886, - "step": 480 - }, - { - "epoch": 0.004331759755299775, - "grad_norm": 7.869180679321289, - "learning_rate": 4.992780400407834e-05, - "loss": 1.516, - "step": 490 - }, - { - "epoch": 0.0044201630156120155, - "grad_norm": 4.000175476074219, - "learning_rate": 4.992633061640647e-05, - "loss": 1.4539, - "step": 500 - }, - { - "epoch": 0.004508566275924256, - "grad_norm": 3.8426191806793213, - "learning_rate": 4.99248572287346e-05, - "loss": 1.4135, - "step": 510 - }, - { - "epoch": 0.004596969536236496, - "grad_norm": 4.768189430236816, - "learning_rate": 4.9923383841062724e-05, - "loss": 1.304, - "step": 520 - }, - { - "epoch": 0.004685372796548737, - "grad_norm": 6.300947666168213, - "learning_rate": 4.992191045339086e-05, - "loss": 1.3956, - "step": 530 - }, - { - "epoch": 0.004773776056860977, - "grad_norm": 5.551243782043457, - "learning_rate": 4.992043706571898e-05, - "loss": 1.3915, - "step": 540 - }, - { - "epoch": 0.004862179317173218, - "grad_norm": 3.9047024250030518, - "learning_rate": 4.9918963678047116e-05, - "loss": 1.3947, - "step": 550 - }, - { - "epoch": 0.004950582577485457, - "grad_norm": 6.883234977722168, - "learning_rate": 4.9917490290375244e-05, - "loss": 1.3875, - "step": 560 - }, - { - "epoch": 0.005038985837797698, - "grad_norm": 5.5822319984436035, - "learning_rate": 4.991601690270337e-05, - "loss": 1.3769, - "step": 570 - }, - { - "epoch": 0.005127389098109938, - "grad_norm": 4.9963297843933105, - "learning_rate": 4.99145435150315e-05, - "loss": 1.3642, - "step": 580 - }, - { - "epoch": 0.005215792358422179, - "grad_norm": 6.4286112785339355, - "learning_rate": 4.9913070127359636e-05, - "loss": 1.4052, - "step": 590 - }, - { - "epoch": 0.005304195618734419, - "grad_norm": 3.423480272293091, - "learning_rate": 4.991159673968776e-05, - "loss": 1.3347, - "step": 600 - }, - { - "epoch": 0.005392598879046659, - "grad_norm": 4.48936128616333, - "learning_rate": 4.991012335201589e-05, - "loss": 1.3606, - "step": 610 - }, - { - "epoch": 0.0054810021393589, - "grad_norm": 3.273667097091675, - "learning_rate": 4.990864996434402e-05, - "loss": 1.4611, - "step": 620 - }, - { - "epoch": 0.00556940539967114, - "grad_norm": 3.781583547592163, - "learning_rate": 4.990717657667215e-05, - "loss": 1.2665, - "step": 630 - }, - { - "epoch": 0.00565780865998338, - "grad_norm": 4.62367582321167, - "learning_rate": 4.990570318900028e-05, - "loss": 1.3098, - "step": 640 - }, - { - "epoch": 0.00574621192029562, - "grad_norm": 3.1934027671813965, - "learning_rate": 4.9904229801328406e-05, - "loss": 1.328, - "step": 650 - }, - { - "epoch": 0.005834615180607861, - "grad_norm": 5.021986484527588, - "learning_rate": 4.9902756413656535e-05, - "loss": 1.2974, - "step": 660 - }, - { - "epoch": 0.005923018440920101, - "grad_norm": 3.9169671535491943, - "learning_rate": 4.990128302598467e-05, - "loss": 1.3832, - "step": 670 - }, - { - "epoch": 0.006011421701232342, - "grad_norm": 5.65451192855835, - "learning_rate": 4.989980963831279e-05, - "loss": 1.3609, - "step": 680 - }, - { - "epoch": 0.006099824961544582, - "grad_norm": 3.694772243499756, - "learning_rate": 4.9898336250640927e-05, - "loss": 1.366, - "step": 690 - }, - { - "epoch": 0.0061882282218568225, - "grad_norm": 4.979604721069336, - "learning_rate": 4.9896862862969055e-05, - "loss": 1.2916, - "step": 700 - }, - { - "epoch": 0.006276631482169062, - "grad_norm": 3.898308515548706, - "learning_rate": 4.989538947529718e-05, - "loss": 1.342, - "step": 710 - }, - { - "epoch": 0.0063650347424813025, - "grad_norm": 7.851255416870117, - "learning_rate": 4.989391608762531e-05, - "loss": 1.2884, - "step": 720 - }, - { - "epoch": 0.006453438002793543, - "grad_norm": 5.869283199310303, - "learning_rate": 4.989244269995345e-05, - "loss": 1.2327, - "step": 730 - }, - { - "epoch": 0.006541841263105783, - "grad_norm": 3.6053364276885986, - "learning_rate": 4.989096931228157e-05, - "loss": 1.3692, - "step": 740 - }, - { - "epoch": 0.006630244523418024, - "grad_norm": 3.283907413482666, - "learning_rate": 4.9889495924609703e-05, - "loss": 1.2745, - "step": 750 - }, - { - "epoch": 0.006718647783730264, - "grad_norm": 4.512964248657227, - "learning_rate": 4.988802253693783e-05, - "loss": 1.3352, - "step": 760 - }, - { - "epoch": 0.006807051044042505, - "grad_norm": 4.550668716430664, - "learning_rate": 4.988654914926596e-05, - "loss": 1.3048, - "step": 770 - }, - { - "epoch": 0.006895454304354744, - "grad_norm": 3.349033832550049, - "learning_rate": 4.988507576159409e-05, - "loss": 1.2523, - "step": 780 - }, - { - "epoch": 0.006983857564666985, - "grad_norm": 4.971866607666016, - "learning_rate": 4.988360237392222e-05, - "loss": 1.2693, - "step": 790 - }, - { - "epoch": 0.007072260824979225, - "grad_norm": 3.0348961353302, - "learning_rate": 4.9882128986250345e-05, - "loss": 1.3548, - "step": 800 - }, - { - "epoch": 0.0071606640852914655, - "grad_norm": 3.880688428878784, - "learning_rate": 4.988065559857848e-05, - "loss": 1.2948, - "step": 810 - }, - { - "epoch": 0.007249067345603706, - "grad_norm": 3.494100570678711, - "learning_rate": 4.987918221090661e-05, - "loss": 1.2544, - "step": 820 - }, - { - "epoch": 0.007337470605915946, - "grad_norm": 6.909191608428955, - "learning_rate": 4.987770882323474e-05, - "loss": 1.3008, - "step": 830 - }, - { - "epoch": 0.007425873866228187, - "grad_norm": 4.434190273284912, - "learning_rate": 4.9876235435562865e-05, - "loss": 1.2514, - "step": 840 - }, - { - "epoch": 0.007514277126540427, - "grad_norm": 3.0245659351348877, - "learning_rate": 4.9874762047890994e-05, - "loss": 1.4152, - "step": 850 - }, - { - "epoch": 0.007602680386852667, - "grad_norm": 3.9230799674987793, - "learning_rate": 4.987328866021912e-05, - "loss": 1.4328, - "step": 860 - }, - { - "epoch": 0.007691083647164907, - "grad_norm": 3.984497547149658, - "learning_rate": 4.987181527254726e-05, - "loss": 1.257, - "step": 870 - }, - { - "epoch": 0.007779486907477148, - "grad_norm": 7.441206932067871, - "learning_rate": 4.9870341884875386e-05, - "loss": 1.304, - "step": 880 - }, - { - "epoch": 0.007867890167789389, - "grad_norm": 5.112447738647461, - "learning_rate": 4.9868868497203514e-05, - "loss": 1.3257, - "step": 890 - }, - { - "epoch": 0.007956293428101628, - "grad_norm": 2.8872740268707275, - "learning_rate": 4.986739510953164e-05, - "loss": 1.3393, - "step": 900 - }, - { - "epoch": 0.008044696688413868, - "grad_norm": 3.278285264968872, - "learning_rate": 4.986592172185977e-05, - "loss": 1.3374, - "step": 910 - }, - { - "epoch": 0.00813309994872611, - "grad_norm": 6.72818660736084, - "learning_rate": 4.98644483341879e-05, - "loss": 1.3533, - "step": 920 - }, - { - "epoch": 0.008221503209038349, - "grad_norm": 3.7416751384735107, - "learning_rate": 4.986297494651603e-05, - "loss": 1.2718, - "step": 930 - }, - { - "epoch": 0.00830990646935059, - "grad_norm": 5.336352825164795, - "learning_rate": 4.986150155884416e-05, - "loss": 1.276, - "step": 940 - }, - { - "epoch": 0.00839830972966283, - "grad_norm": 4.085439205169678, - "learning_rate": 4.986002817117229e-05, - "loss": 1.2924, - "step": 950 - }, - { - "epoch": 0.008486712989975071, - "grad_norm": 4.730822563171387, - "learning_rate": 4.985855478350042e-05, - "loss": 1.1914, - "step": 960 - }, - { - "epoch": 0.00857511625028731, - "grad_norm": 8.323603630065918, - "learning_rate": 4.985708139582855e-05, - "loss": 1.3709, - "step": 970 - }, - { - "epoch": 0.00866351951059955, - "grad_norm": 5.226520538330078, - "learning_rate": 4.9855608008156676e-05, - "loss": 1.3267, - "step": 980 - }, - { - "epoch": 0.008751922770911792, - "grad_norm": 6.736996650695801, - "learning_rate": 4.9854134620484804e-05, - "loss": 1.2823, - "step": 990 - }, - { - "epoch": 0.008840326031224031, - "grad_norm": 4.369340896606445, - "learning_rate": 4.985266123281294e-05, - "loss": 1.2985, - "step": 1000 - }, - { - "epoch": 0.008928729291536272, - "grad_norm": 4.70871639251709, - "learning_rate": 4.985118784514106e-05, - "loss": 1.3114, - "step": 1010 - }, - { - "epoch": 0.009017132551848512, - "grad_norm": 5.202262878417969, - "learning_rate": 4.9849714457469196e-05, - "loss": 1.3938, - "step": 1020 - }, - { - "epoch": 0.009105535812160753, - "grad_norm": 2.4306414127349854, - "learning_rate": 4.9848241069797324e-05, - "loss": 1.1892, - "step": 1030 - }, - { - "epoch": 0.009193939072472993, - "grad_norm": 8.203335762023926, - "learning_rate": 4.984676768212545e-05, - "loss": 1.1901, - "step": 1040 - }, - { - "epoch": 0.009282342332785232, - "grad_norm": 4.401038646697998, - "learning_rate": 4.984529429445358e-05, - "loss": 1.2973, - "step": 1050 - }, - { - "epoch": 0.009370745593097474, - "grad_norm": 4.717940330505371, - "learning_rate": 4.9843820906781716e-05, - "loss": 1.2784, - "step": 1060 - }, - { - "epoch": 0.009459148853409713, - "grad_norm": 2.6563339233398438, - "learning_rate": 4.984234751910984e-05, - "loss": 1.3232, - "step": 1070 - }, - { - "epoch": 0.009547552113721955, - "grad_norm": 5.412891864776611, - "learning_rate": 4.984087413143797e-05, - "loss": 1.2354, - "step": 1080 - }, - { - "epoch": 0.009635955374034194, - "grad_norm": 3.79567289352417, - "learning_rate": 4.98394007437661e-05, - "loss": 1.2998, - "step": 1090 - }, - { - "epoch": 0.009724358634346435, - "grad_norm": 6.965702533721924, - "learning_rate": 4.983792735609423e-05, - "loss": 1.1795, - "step": 1100 - }, - { - "epoch": 0.009812761894658675, - "grad_norm": 11.925300598144531, - "learning_rate": 4.983645396842236e-05, - "loss": 1.2163, - "step": 1110 - }, - { - "epoch": 0.009901165154970915, - "grad_norm": 4.589380264282227, - "learning_rate": 4.9834980580750486e-05, - "loss": 1.2232, - "step": 1120 - }, - { - "epoch": 0.009989568415283156, - "grad_norm": 4.49373722076416, - "learning_rate": 4.9833507193078615e-05, - "loss": 1.252, - "step": 1130 - }, - { - "epoch": 0.010077971675595395, - "grad_norm": 3.2139599323272705, - "learning_rate": 4.983203380540675e-05, - "loss": 1.2259, - "step": 1140 - }, - { - "epoch": 0.010166374935907637, - "grad_norm": 3.8811285495758057, - "learning_rate": 4.983056041773487e-05, - "loss": 1.2837, - "step": 1150 - }, - { - "epoch": 0.010254778196219876, - "grad_norm": 4.598730087280273, - "learning_rate": 4.982908703006301e-05, - "loss": 1.1905, - "step": 1160 - }, - { - "epoch": 0.010343181456532118, - "grad_norm": 5.53896951675415, - "learning_rate": 4.9827613642391135e-05, - "loss": 1.3271, - "step": 1170 - }, - { - "epoch": 0.010431584716844357, - "grad_norm": 3.8789303302764893, - "learning_rate": 4.982614025471926e-05, - "loss": 1.2986, - "step": 1180 - }, - { - "epoch": 0.010519987977156597, - "grad_norm": 5.903724193572998, - "learning_rate": 4.982466686704739e-05, - "loss": 1.2604, - "step": 1190 - }, - { - "epoch": 0.010608391237468838, - "grad_norm": 3.7968711853027344, - "learning_rate": 4.982319347937553e-05, - "loss": 1.3397, - "step": 1200 - }, - { - "epoch": 0.010696794497781078, - "grad_norm": 3.2264816761016846, - "learning_rate": 4.982172009170365e-05, - "loss": 1.2966, - "step": 1210 - }, - { - "epoch": 0.010785197758093319, - "grad_norm": 4.483380317687988, - "learning_rate": 4.9820246704031784e-05, - "loss": 1.2768, - "step": 1220 - }, - { - "epoch": 0.010873601018405558, - "grad_norm": 3.280275583267212, - "learning_rate": 4.981877331635991e-05, - "loss": 1.2943, - "step": 1230 - }, - { - "epoch": 0.0109620042787178, - "grad_norm": 3.700066566467285, - "learning_rate": 4.981729992868804e-05, - "loss": 1.2548, - "step": 1240 - }, - { - "epoch": 0.01105040753903004, - "grad_norm": 6.6137237548828125, - "learning_rate": 4.981582654101617e-05, - "loss": 1.2147, - "step": 1250 - }, - { - "epoch": 0.01113881079934228, - "grad_norm": 3.8108301162719727, - "learning_rate": 4.98143531533443e-05, - "loss": 1.2061, - "step": 1260 - }, - { - "epoch": 0.01122721405965452, - "grad_norm": 5.0468363761901855, - "learning_rate": 4.9812879765672425e-05, - "loss": 1.2851, - "step": 1270 - }, - { - "epoch": 0.01131561731996676, - "grad_norm": 4.338964939117432, - "learning_rate": 4.981140637800056e-05, - "loss": 1.2716, - "step": 1280 - }, - { - "epoch": 0.011404020580279001, - "grad_norm": 6.1364288330078125, - "learning_rate": 4.980993299032868e-05, - "loss": 1.1857, - "step": 1290 - }, - { - "epoch": 0.01149242384059124, - "grad_norm": 9.080232620239258, - "learning_rate": 4.980845960265682e-05, - "loss": 1.3553, - "step": 1300 - }, - { - "epoch": 0.011580827100903482, - "grad_norm": 3.361820697784424, - "learning_rate": 4.9806986214984946e-05, - "loss": 1.3031, - "step": 1310 - }, - { - "epoch": 0.011669230361215721, - "grad_norm": 4.053516864776611, - "learning_rate": 4.9805512827313074e-05, - "loss": 1.2737, - "step": 1320 - }, - { - "epoch": 0.011757633621527963, - "grad_norm": 4.365649700164795, - "learning_rate": 4.98040394396412e-05, - "loss": 1.2599, - "step": 1330 - }, - { - "epoch": 0.011846036881840202, - "grad_norm": 4.1593475341796875, - "learning_rate": 4.980256605196934e-05, - "loss": 1.3096, - "step": 1340 - }, - { - "epoch": 0.011934440142152442, - "grad_norm": 3.454594850540161, - "learning_rate": 4.980109266429746e-05, - "loss": 1.1883, - "step": 1350 - }, - { - "epoch": 0.012022843402464683, - "grad_norm": 5.860595703125, - "learning_rate": 4.9799619276625594e-05, - "loss": 1.1906, - "step": 1360 - }, - { - "epoch": 0.012111246662776923, - "grad_norm": 4.583675861358643, - "learning_rate": 4.9798145888953716e-05, - "loss": 1.2182, - "step": 1370 - }, - { - "epoch": 0.012199649923089164, - "grad_norm": 4.073757171630859, - "learning_rate": 4.979667250128185e-05, - "loss": 1.3099, - "step": 1380 - }, - { - "epoch": 0.012288053183401404, - "grad_norm": 6.656314373016357, - "learning_rate": 4.979519911360998e-05, - "loss": 1.1719, - "step": 1390 - }, - { - "epoch": 0.012376456443713645, - "grad_norm": 4.785161018371582, - "learning_rate": 4.979372572593811e-05, - "loss": 1.2248, - "step": 1400 - }, - { - "epoch": 0.012464859704025884, - "grad_norm": 4.953686237335205, - "learning_rate": 4.9792252338266236e-05, - "loss": 1.1863, - "step": 1410 - }, - { - "epoch": 0.012553262964338124, - "grad_norm": 6.9218268394470215, - "learning_rate": 4.979077895059437e-05, - "loss": 1.1563, - "step": 1420 - }, - { - "epoch": 0.012641666224650365, - "grad_norm": 6.062386989593506, - "learning_rate": 4.978930556292249e-05, - "loss": 1.2917, - "step": 1430 - }, - { - "epoch": 0.012730069484962605, - "grad_norm": 7.698704719543457, - "learning_rate": 4.978783217525063e-05, - "loss": 1.2457, - "step": 1440 - }, - { - "epoch": 0.012818472745274846, - "grad_norm": 6.8854169845581055, - "learning_rate": 4.9786358787578756e-05, - "loss": 1.255, - "step": 1450 - }, - { - "epoch": 0.012906876005587086, - "grad_norm": 12.989723205566406, - "learning_rate": 4.9784885399906884e-05, - "loss": 1.1496, - "step": 1460 - }, - { - "epoch": 0.012995279265899327, - "grad_norm": 6.384410858154297, - "learning_rate": 4.978341201223501e-05, - "loss": 1.2042, - "step": 1470 - }, - { - "epoch": 0.013083682526211567, - "grad_norm": 5.162613391876221, - "learning_rate": 4.978193862456314e-05, - "loss": 1.1764, - "step": 1480 - }, - { - "epoch": 0.013172085786523806, - "grad_norm": 7.121384620666504, - "learning_rate": 4.978046523689127e-05, - "loss": 1.2491, - "step": 1490 - }, - { - "epoch": 0.013260489046836047, - "grad_norm": 5.269081115722656, - "learning_rate": 4.9778991849219405e-05, - "loss": 1.1909, - "step": 1500 - }, - { - "epoch": 0.013348892307148287, - "grad_norm": 3.8427023887634277, - "learning_rate": 4.9777518461547526e-05, - "loss": 1.2535, - "step": 1510 - }, - { - "epoch": 0.013437295567460528, - "grad_norm": 4.315703392028809, - "learning_rate": 4.977604507387566e-05, - "loss": 1.286, - "step": 1520 - }, - { - "epoch": 0.013525698827772768, - "grad_norm": 6.8337578773498535, - "learning_rate": 4.977457168620379e-05, - "loss": 1.237, - "step": 1530 - }, - { - "epoch": 0.01361410208808501, - "grad_norm": 3.717587471008301, - "learning_rate": 4.977309829853192e-05, - "loss": 1.1511, - "step": 1540 - }, - { - "epoch": 0.013702505348397249, - "grad_norm": 5.48192024230957, - "learning_rate": 4.9771624910860046e-05, - "loss": 1.148, - "step": 1550 - }, - { - "epoch": 0.013790908608709488, - "grad_norm": 5.219634532928467, - "learning_rate": 4.977015152318818e-05, - "loss": 1.234, - "step": 1560 - }, - { - "epoch": 0.01387931186902173, - "grad_norm": 4.309090614318848, - "learning_rate": 4.97686781355163e-05, - "loss": 1.1488, - "step": 1570 - }, - { - "epoch": 0.01396771512933397, - "grad_norm": 6.279152870178223, - "learning_rate": 4.976720474784444e-05, - "loss": 1.1941, - "step": 1580 - }, - { - "epoch": 0.01405611838964621, - "grad_norm": 5.642574310302734, - "learning_rate": 4.9765731360172567e-05, - "loss": 1.3957, - "step": 1590 - }, - { - "epoch": 0.01414452164995845, - "grad_norm": 5.639019012451172, - "learning_rate": 4.9764257972500695e-05, - "loss": 1.251, - "step": 1600 - }, - { - "epoch": 0.014232924910270691, - "grad_norm": 3.8754444122314453, - "learning_rate": 4.976278458482882e-05, - "loss": 1.2151, - "step": 1610 - }, - { - "epoch": 0.014321328170582931, - "grad_norm": 3.1772994995117188, - "learning_rate": 4.976131119715695e-05, - "loss": 1.2177, - "step": 1620 - }, - { - "epoch": 0.014409731430895172, - "grad_norm": 6.411615371704102, - "learning_rate": 4.975983780948508e-05, - "loss": 1.1621, - "step": 1630 - }, - { - "epoch": 0.014498134691207412, - "grad_norm": 3.6244139671325684, - "learning_rate": 4.9758364421813215e-05, - "loss": 1.2433, - "step": 1640 - }, - { - "epoch": 0.014586537951519651, - "grad_norm": 3.861219882965088, - "learning_rate": 4.975689103414134e-05, - "loss": 1.1503, - "step": 1650 - }, - { - "epoch": 0.014674941211831893, - "grad_norm": 6.717658519744873, - "learning_rate": 4.975541764646947e-05, - "loss": 1.1981, - "step": 1660 - }, - { - "epoch": 0.014763344472144132, - "grad_norm": 4.457257270812988, - "learning_rate": 4.97539442587976e-05, - "loss": 1.173, - "step": 1670 - }, - { - "epoch": 0.014851747732456374, - "grad_norm": 4.979446887969971, - "learning_rate": 4.975247087112573e-05, - "loss": 1.1214, - "step": 1680 - }, - { - "epoch": 0.014940150992768613, - "grad_norm": 4.89369010925293, - "learning_rate": 4.975099748345386e-05, - "loss": 1.2174, - "step": 1690 - }, - { - "epoch": 0.015028554253080854, - "grad_norm": 5.477513790130615, - "learning_rate": 4.974952409578199e-05, - "loss": 1.1146, - "step": 1700 - }, - { - "epoch": 0.015116957513393094, - "grad_norm": 4.475773334503174, - "learning_rate": 4.9748050708110114e-05, - "loss": 1.1294, - "step": 1710 - }, - { - "epoch": 0.015205360773705334, - "grad_norm": 4.1478190422058105, - "learning_rate": 4.974657732043825e-05, - "loss": 1.2346, - "step": 1720 - }, - { - "epoch": 0.015293764034017575, - "grad_norm": 4.169029235839844, - "learning_rate": 4.974510393276638e-05, - "loss": 1.184, - "step": 1730 - }, - { - "epoch": 0.015382167294329814, - "grad_norm": 3.632573366165161, - "learning_rate": 4.9743630545094505e-05, - "loss": 1.281, - "step": 1740 - }, - { - "epoch": 0.015470570554642056, - "grad_norm": 3.9423365592956543, - "learning_rate": 4.9742157157422634e-05, - "loss": 1.0858, - "step": 1750 - }, - { - "epoch": 0.015558973814954295, - "grad_norm": 7.415809154510498, - "learning_rate": 4.974068376975076e-05, - "loss": 1.1911, - "step": 1760 - }, - { - "epoch": 0.015647377075266537, - "grad_norm": 6.262916564941406, - "learning_rate": 4.973921038207889e-05, - "loss": 1.1719, - "step": 1770 - }, - { - "epoch": 0.015735780335578778, - "grad_norm": 5.934061527252197, - "learning_rate": 4.9737736994407026e-05, - "loss": 1.2425, - "step": 1780 - }, - { - "epoch": 0.015824183595891016, - "grad_norm": 3.6301071643829346, - "learning_rate": 4.9736263606735154e-05, - "loss": 1.2585, - "step": 1790 - }, - { - "epoch": 0.015912586856203257, - "grad_norm": 4.821804046630859, - "learning_rate": 4.973479021906328e-05, - "loss": 1.2402, - "step": 1800 - }, - { - "epoch": 0.0160009901165155, - "grad_norm": 5.870527267456055, - "learning_rate": 4.973331683139141e-05, - "loss": 1.1582, - "step": 1810 - }, - { - "epoch": 0.016089393376827736, - "grad_norm": 3.978375196456909, - "learning_rate": 4.973184344371954e-05, - "loss": 1.2734, - "step": 1820 - }, - { - "epoch": 0.016177796637139977, - "grad_norm": 3.5663650035858154, - "learning_rate": 4.973037005604767e-05, - "loss": 1.2663, - "step": 1830 - }, - { - "epoch": 0.01626619989745222, - "grad_norm": 4.388373851776123, - "learning_rate": 4.9728896668375796e-05, - "loss": 1.2131, - "step": 1840 - }, - { - "epoch": 0.01635460315776446, - "grad_norm": 7.263591289520264, - "learning_rate": 4.972742328070393e-05, - "loss": 1.1366, - "step": 1850 - }, - { - "epoch": 0.016443006418076698, - "grad_norm": 3.496192455291748, - "learning_rate": 4.972594989303206e-05, - "loss": 1.1126, - "step": 1860 - }, - { - "epoch": 0.01653140967838894, - "grad_norm": 4.2536702156066895, - "learning_rate": 4.972447650536019e-05, - "loss": 1.2666, - "step": 1870 - }, - { - "epoch": 0.01661981293870118, - "grad_norm": 3.563420534133911, - "learning_rate": 4.9723003117688316e-05, - "loss": 1.1801, - "step": 1880 - }, - { - "epoch": 0.01670821619901342, - "grad_norm": 4.134334564208984, - "learning_rate": 4.9721529730016444e-05, - "loss": 1.2148, - "step": 1890 - }, - { - "epoch": 0.01679661945932566, - "grad_norm": 4.57625675201416, - "learning_rate": 4.972005634234457e-05, - "loss": 1.0861, - "step": 1900 - }, - { - "epoch": 0.0168850227196379, - "grad_norm": 6.389843463897705, - "learning_rate": 4.971858295467271e-05, - "loss": 1.1459, - "step": 1910 - }, - { - "epoch": 0.016973425979950142, - "grad_norm": 5.516792297363281, - "learning_rate": 4.9717109567000836e-05, - "loss": 1.2473, - "step": 1920 - }, - { - "epoch": 0.01706182924026238, - "grad_norm": 4.792562961578369, - "learning_rate": 4.9715636179328964e-05, - "loss": 1.253, - "step": 1930 - }, - { - "epoch": 0.01715023250057462, - "grad_norm": 3.1397716999053955, - "learning_rate": 4.971416279165709e-05, - "loss": 1.1486, - "step": 1940 - }, - { - "epoch": 0.017238635760886863, - "grad_norm": 4.664846897125244, - "learning_rate": 4.971268940398522e-05, - "loss": 1.2853, - "step": 1950 - }, - { - "epoch": 0.0173270390211991, - "grad_norm": 3.8001105785369873, - "learning_rate": 4.971121601631335e-05, - "loss": 1.2221, - "step": 1960 - }, - { - "epoch": 0.017415442281511342, - "grad_norm": 4.641026973724365, - "learning_rate": 4.9709742628641485e-05, - "loss": 1.2473, - "step": 1970 - }, - { - "epoch": 0.017503845541823583, - "grad_norm": 2.9220542907714844, - "learning_rate": 4.9708269240969606e-05, - "loss": 1.201, - "step": 1980 - }, - { - "epoch": 0.017592248802135824, - "grad_norm": 4.261163234710693, - "learning_rate": 4.970679585329774e-05, - "loss": 1.2117, - "step": 1990 - }, - { - "epoch": 0.017680652062448062, - "grad_norm": 8.084556579589844, - "learning_rate": 4.970532246562587e-05, - "loss": 1.2751, - "step": 2000 - }, - { - "epoch": 0.017769055322760303, - "grad_norm": 3.9785537719726562, - "learning_rate": 4.9703849077954e-05, - "loss": 1.2035, - "step": 2010 - }, - { - "epoch": 0.017857458583072545, - "grad_norm": 4.966981887817383, - "learning_rate": 4.9702375690282126e-05, - "loss": 1.0638, - "step": 2020 - }, - { - "epoch": 0.017945861843384783, - "grad_norm": 3.9670522212982178, - "learning_rate": 4.970090230261026e-05, - "loss": 1.2402, - "step": 2030 - }, - { - "epoch": 0.018034265103697024, - "grad_norm": 6.027310371398926, - "learning_rate": 4.969942891493838e-05, - "loss": 1.1792, - "step": 2040 - }, - { - "epoch": 0.018122668364009265, - "grad_norm": 8.415605545043945, - "learning_rate": 4.969795552726652e-05, - "loss": 1.1563, - "step": 2050 - }, - { - "epoch": 0.018211071624321507, - "grad_norm": 4.428287506103516, - "learning_rate": 4.9696482139594647e-05, - "loss": 1.2599, - "step": 2060 - }, - { - "epoch": 0.018299474884633744, - "grad_norm": 3.5284461975097656, - "learning_rate": 4.9695008751922775e-05, - "loss": 1.2318, - "step": 2070 - }, - { - "epoch": 0.018387878144945986, - "grad_norm": 8.998053550720215, - "learning_rate": 4.96935353642509e-05, - "loss": 1.1768, - "step": 2080 - }, - { - "epoch": 0.018476281405258227, - "grad_norm": 7.443400859832764, - "learning_rate": 4.969206197657903e-05, - "loss": 1.2308, - "step": 2090 - }, - { - "epoch": 0.018564684665570465, - "grad_norm": 5.766305923461914, - "learning_rate": 4.969058858890716e-05, - "loss": 1.2445, - "step": 2100 - }, - { - "epoch": 0.018653087925882706, - "grad_norm": 4.347663879394531, - "learning_rate": 4.9689115201235295e-05, - "loss": 1.1199, - "step": 2110 - }, - { - "epoch": 0.018741491186194947, - "grad_norm": 7.1854705810546875, - "learning_rate": 4.968764181356342e-05, - "loss": 1.1827, - "step": 2120 - }, - { - "epoch": 0.01882989444650719, - "grad_norm": 8.310243606567383, - "learning_rate": 4.968616842589155e-05, - "loss": 1.1784, - "step": 2130 - }, - { - "epoch": 0.018918297706819426, - "grad_norm": 3.564779043197632, - "learning_rate": 4.968469503821968e-05, - "loss": 1.1829, - "step": 2140 - }, - { - "epoch": 0.019006700967131668, - "grad_norm": 4.239924907684326, - "learning_rate": 4.968322165054781e-05, - "loss": 1.1483, - "step": 2150 - }, - { - "epoch": 0.01909510422744391, - "grad_norm": 3.9210386276245117, - "learning_rate": 4.968174826287594e-05, - "loss": 1.1491, - "step": 2160 - }, - { - "epoch": 0.019183507487756147, - "grad_norm": 3.756441831588745, - "learning_rate": 4.968027487520407e-05, - "loss": 1.1951, - "step": 2170 - }, - { - "epoch": 0.019271910748068388, - "grad_norm": 6.451724529266357, - "learning_rate": 4.9678801487532194e-05, - "loss": 1.1629, - "step": 2180 - }, - { - "epoch": 0.01936031400838063, - "grad_norm": 6.526517391204834, - "learning_rate": 4.967732809986033e-05, - "loss": 1.1858, - "step": 2190 - }, - { - "epoch": 0.01944871726869287, - "grad_norm": 3.928395986557007, - "learning_rate": 4.967585471218845e-05, - "loss": 1.1959, - "step": 2200 - }, - { - "epoch": 0.01953712052900511, - "grad_norm": 5.018512725830078, - "learning_rate": 4.9674381324516585e-05, - "loss": 1.1671, - "step": 2210 - }, - { - "epoch": 0.01962552378931735, - "grad_norm": 5.034059524536133, - "learning_rate": 4.9672907936844714e-05, - "loss": 1.1155, - "step": 2220 - }, - { - "epoch": 0.01971392704962959, - "grad_norm": 7.713180065155029, - "learning_rate": 4.967143454917284e-05, - "loss": 0.9553, - "step": 2230 - }, - { - "epoch": 0.01980233030994183, - "grad_norm": 4.939086437225342, - "learning_rate": 4.966996116150097e-05, - "loss": 1.2005, - "step": 2240 - }, - { - "epoch": 0.01989073357025407, - "grad_norm": 5.7985920906066895, - "learning_rate": 4.9668487773829106e-05, - "loss": 1.2426, - "step": 2250 - }, - { - "epoch": 0.01997913683056631, - "grad_norm": 3.0697240829467773, - "learning_rate": 4.966701438615723e-05, - "loss": 1.2048, - "step": 2260 - }, - { - "epoch": 0.020067540090878553, - "grad_norm": 5.362508773803711, - "learning_rate": 4.966554099848536e-05, - "loss": 1.2438, - "step": 2270 - }, - { - "epoch": 0.02015594335119079, - "grad_norm": 3.468385934829712, - "learning_rate": 4.966406761081349e-05, - "loss": 1.1942, - "step": 2280 - }, - { - "epoch": 0.020244346611503032, - "grad_norm": 5.108860015869141, - "learning_rate": 4.966259422314162e-05, - "loss": 1.1456, - "step": 2290 - }, - { - "epoch": 0.020332749871815273, - "grad_norm": 4.569157600402832, - "learning_rate": 4.966112083546975e-05, - "loss": 1.1289, - "step": 2300 - }, - { - "epoch": 0.02042115313212751, - "grad_norm": 8.107612609863281, - "learning_rate": 4.9659647447797876e-05, - "loss": 1.0621, - "step": 2310 - }, - { - "epoch": 0.020509556392439753, - "grad_norm": 3.4186110496520996, - "learning_rate": 4.9658174060126004e-05, - "loss": 1.0888, - "step": 2320 - }, - { - "epoch": 0.020597959652751994, - "grad_norm": 6.030848979949951, - "learning_rate": 4.965670067245414e-05, - "loss": 1.1178, - "step": 2330 - }, - { - "epoch": 0.020686362913064235, - "grad_norm": 3.222794532775879, - "learning_rate": 4.965522728478226e-05, - "loss": 1.1568, - "step": 2340 - }, - { - "epoch": 0.020774766173376473, - "grad_norm": 4.0737481117248535, - "learning_rate": 4.9653753897110396e-05, - "loss": 1.1832, - "step": 2350 - }, - { - "epoch": 0.020863169433688714, - "grad_norm": 4.104101657867432, - "learning_rate": 4.9652280509438524e-05, - "loss": 1.2447, - "step": 2360 - }, - { - "epoch": 0.020951572694000956, - "grad_norm": 6.571559429168701, - "learning_rate": 4.965080712176665e-05, - "loss": 1.1735, - "step": 2370 - }, - { - "epoch": 0.021039975954313193, - "grad_norm": 3.1230075359344482, - "learning_rate": 4.964933373409478e-05, - "loss": 1.1589, - "step": 2380 - }, - { - "epoch": 0.021128379214625435, - "grad_norm": 4.8723835945129395, - "learning_rate": 4.9647860346422916e-05, - "loss": 1.2285, - "step": 2390 - }, - { - "epoch": 0.021216782474937676, - "grad_norm": 3.4131295680999756, - "learning_rate": 4.964638695875104e-05, - "loss": 1.069, - "step": 2400 - }, - { - "epoch": 0.021305185735249917, - "grad_norm": 13.047353744506836, - "learning_rate": 4.964491357107917e-05, - "loss": 1.0728, - "step": 2410 - }, - { - "epoch": 0.021393588995562155, - "grad_norm": 4.622305393218994, - "learning_rate": 4.9643440183407294e-05, - "loss": 1.0736, - "step": 2420 - }, - { - "epoch": 0.021481992255874396, - "grad_norm": 4.833291053771973, - "learning_rate": 4.964196679573543e-05, - "loss": 1.1708, - "step": 2430 - }, - { - "epoch": 0.021570395516186638, - "grad_norm": 5.150927543640137, - "learning_rate": 4.964049340806356e-05, - "loss": 1.2133, - "step": 2440 - }, - { - "epoch": 0.021658798776498876, - "grad_norm": 5.4327592849731445, - "learning_rate": 4.9639020020391686e-05, - "loss": 1.0678, - "step": 2450 - }, - { - "epoch": 0.021747202036811117, - "grad_norm": 7.642373561859131, - "learning_rate": 4.9637546632719815e-05, - "loss": 1.2008, - "step": 2460 - }, - { - "epoch": 0.021835605297123358, - "grad_norm": 7.505542278289795, - "learning_rate": 4.963607324504795e-05, - "loss": 0.9884, - "step": 2470 - }, - { - "epoch": 0.0219240085574356, - "grad_norm": 5.696202278137207, - "learning_rate": 4.963459985737607e-05, - "loss": 1.0402, - "step": 2480 - }, - { - "epoch": 0.022012411817747837, - "grad_norm": 5.025146007537842, - "learning_rate": 4.9633126469704206e-05, - "loss": 1.16, - "step": 2490 - }, - { - "epoch": 0.02210081507806008, - "grad_norm": 4.16343879699707, - "learning_rate": 4.9631653082032335e-05, - "loss": 1.1264, - "step": 2500 - }, - { - "epoch": 0.02218921833837232, - "grad_norm": 4.130855560302734, - "learning_rate": 4.963017969436046e-05, - "loss": 1.0499, - "step": 2510 - }, - { - "epoch": 0.02227762159868456, - "grad_norm": 6.513915061950684, - "learning_rate": 4.962870630668859e-05, - "loss": 1.1892, - "step": 2520 - }, - { - "epoch": 0.0223660248589968, - "grad_norm": 7.0379486083984375, - "learning_rate": 4.962723291901673e-05, - "loss": 1.058, - "step": 2530 - }, - { - "epoch": 0.02245442811930904, - "grad_norm": 4.891809463500977, - "learning_rate": 4.962575953134485e-05, - "loss": 1.1433, - "step": 2540 - }, - { - "epoch": 0.02254283137962128, - "grad_norm": 2.6610805988311768, - "learning_rate": 4.962428614367298e-05, - "loss": 1.396, - "step": 2550 - }, - { - "epoch": 0.02263123463993352, - "grad_norm": 4.087412357330322, - "learning_rate": 4.9622812756001105e-05, - "loss": 1.1833, - "step": 2560 - }, - { - "epoch": 0.02271963790024576, - "grad_norm": 4.190392017364502, - "learning_rate": 4.962133936832924e-05, - "loss": 1.0581, - "step": 2570 - }, - { - "epoch": 0.022808041160558002, - "grad_norm": 5.221377849578857, - "learning_rate": 4.961986598065737e-05, - "loss": 0.9584, - "step": 2580 - }, - { - "epoch": 0.022896444420870243, - "grad_norm": 6.462989330291748, - "learning_rate": 4.96183925929855e-05, - "loss": 1.1468, - "step": 2590 - }, - { - "epoch": 0.02298484768118248, - "grad_norm": 5.1533308029174805, - "learning_rate": 4.9616919205313625e-05, - "loss": 1.1259, - "step": 2600 - }, - { - "epoch": 0.023073250941494722, - "grad_norm": 4.590929985046387, - "learning_rate": 4.961544581764176e-05, - "loss": 1.1365, - "step": 2610 - }, - { - "epoch": 0.023161654201806964, - "grad_norm": 4.935781478881836, - "learning_rate": 4.961397242996988e-05, - "loss": 1.154, - "step": 2620 - }, - { - "epoch": 0.0232500574621192, - "grad_norm": 6.083306312561035, - "learning_rate": 4.961249904229802e-05, - "loss": 1.008, - "step": 2630 - }, - { - "epoch": 0.023338460722431443, - "grad_norm": 3.8785037994384766, - "learning_rate": 4.9611025654626145e-05, - "loss": 1.1141, - "step": 2640 - }, - { - "epoch": 0.023426863982743684, - "grad_norm": 2.921254873275757, - "learning_rate": 4.9609552266954274e-05, - "loss": 1.1871, - "step": 2650 - }, - { - "epoch": 0.023515267243055926, - "grad_norm": 3.7994141578674316, - "learning_rate": 4.96080788792824e-05, - "loss": 1.2912, - "step": 2660 - }, - { - "epoch": 0.023603670503368163, - "grad_norm": 3.8834662437438965, - "learning_rate": 4.960660549161053e-05, - "loss": 1.206, - "step": 2670 - }, - { - "epoch": 0.023692073763680405, - "grad_norm": 3.5875086784362793, - "learning_rate": 4.960513210393866e-05, - "loss": 1.0901, - "step": 2680 - }, - { - "epoch": 0.023780477023992646, - "grad_norm": 6.173762321472168, - "learning_rate": 4.9603658716266794e-05, - "loss": 1.0821, - "step": 2690 - }, - { - "epoch": 0.023868880284304884, - "grad_norm": 5.421717166900635, - "learning_rate": 4.960218532859492e-05, - "loss": 1.1437, - "step": 2700 - }, - { - "epoch": 0.023957283544617125, - "grad_norm": 3.7461869716644287, - "learning_rate": 4.960071194092305e-05, - "loss": 1.063, - "step": 2710 - }, - { - "epoch": 0.024045686804929366, - "grad_norm": 3.8545279502868652, - "learning_rate": 4.959923855325118e-05, - "loss": 1.1014, - "step": 2720 - }, - { - "epoch": 0.024134090065241608, - "grad_norm": 7.023557186126709, - "learning_rate": 4.959776516557931e-05, - "loss": 1.1094, - "step": 2730 - }, - { - "epoch": 0.024222493325553845, - "grad_norm": 4.638785362243652, - "learning_rate": 4.9596291777907436e-05, - "loss": 1.1082, - "step": 2740 - }, - { - "epoch": 0.024310896585866087, - "grad_norm": 3.8988077640533447, - "learning_rate": 4.959481839023557e-05, - "loss": 1.0179, - "step": 2750 - }, - { - "epoch": 0.024399299846178328, - "grad_norm": 6.727329254150391, - "learning_rate": 4.95933450025637e-05, - "loss": 1.1137, - "step": 2760 - }, - { - "epoch": 0.024487703106490566, - "grad_norm": 5.458581924438477, - "learning_rate": 4.959187161489183e-05, - "loss": 1.0253, - "step": 2770 - }, - { - "epoch": 0.024576106366802807, - "grad_norm": 4.3151469230651855, - "learning_rate": 4.9590398227219956e-05, - "loss": 1.1655, - "step": 2780 - }, - { - "epoch": 0.02466450962711505, - "grad_norm": 3.221970796585083, - "learning_rate": 4.9588924839548084e-05, - "loss": 1.2279, - "step": 2790 - }, - { - "epoch": 0.02475291288742729, - "grad_norm": 5.484412670135498, - "learning_rate": 4.958745145187621e-05, - "loss": 1.1114, - "step": 2800 - }, - { - "epoch": 0.024841316147739528, - "grad_norm": 3.404670476913452, - "learning_rate": 4.958597806420434e-05, - "loss": 1.1584, - "step": 2810 - }, - { - "epoch": 0.02492971940805177, - "grad_norm": 4.5648298263549805, - "learning_rate": 4.9584504676532476e-05, - "loss": 0.9469, - "step": 2820 - }, - { - "epoch": 0.02501812266836401, - "grad_norm": 5.958621501922607, - "learning_rate": 4.9583031288860604e-05, - "loss": 1.1678, - "step": 2830 - }, - { - "epoch": 0.025106525928676248, - "grad_norm": 4.206620693206787, - "learning_rate": 4.958155790118873e-05, - "loss": 1.2014, - "step": 2840 - }, - { - "epoch": 0.02519492918898849, - "grad_norm": 3.300164222717285, - "learning_rate": 4.958008451351686e-05, - "loss": 1.0201, - "step": 2850 - }, - { - "epoch": 0.02528333244930073, - "grad_norm": 5.670661926269531, - "learning_rate": 4.957861112584499e-05, - "loss": 1.1142, - "step": 2860 - }, - { - "epoch": 0.025371735709612972, - "grad_norm": 5.453832626342773, - "learning_rate": 4.957713773817312e-05, - "loss": 1.1028, - "step": 2870 - }, - { - "epoch": 0.02546013896992521, - "grad_norm": 4.52194356918335, - "learning_rate": 4.957566435050125e-05, - "loss": 1.1089, - "step": 2880 - }, - { - "epoch": 0.02554854223023745, - "grad_norm": 5.5644073486328125, - "learning_rate": 4.9574190962829375e-05, - "loss": 1.0958, - "step": 2890 - }, - { - "epoch": 0.025636945490549692, - "grad_norm": 6.353917121887207, - "learning_rate": 4.957271757515751e-05, - "loss": 1.0777, - "step": 2900 - }, - { - "epoch": 0.02572534875086193, - "grad_norm": 5.321937561035156, - "learning_rate": 4.957124418748564e-05, - "loss": 1.0605, - "step": 2910 - }, - { - "epoch": 0.02581375201117417, - "grad_norm": 6.517303466796875, - "learning_rate": 4.9569770799813766e-05, - "loss": 1.0668, - "step": 2920 - }, - { - "epoch": 0.025902155271486413, - "grad_norm": 5.2277374267578125, - "learning_rate": 4.9568297412141895e-05, - "loss": 1.05, - "step": 2930 - }, - { - "epoch": 0.025990558531798654, - "grad_norm": 6.279105186462402, - "learning_rate": 4.956682402447003e-05, - "loss": 1.0839, - "step": 2940 - }, - { - "epoch": 0.026078961792110892, - "grad_norm": 4.64212703704834, - "learning_rate": 4.956535063679815e-05, - "loss": 1.044, - "step": 2950 - }, - { - "epoch": 0.026167365052423133, - "grad_norm": 7.826188564300537, - "learning_rate": 4.9563877249126287e-05, - "loss": 1.1387, - "step": 2960 - }, - { - "epoch": 0.026255768312735375, - "grad_norm": 5.159195899963379, - "learning_rate": 4.9562403861454415e-05, - "loss": 1.0018, - "step": 2970 - }, - { - "epoch": 0.026344171573047612, - "grad_norm": 13.026447296142578, - "learning_rate": 4.956093047378254e-05, - "loss": 1.2087, - "step": 2980 - }, - { - "epoch": 0.026432574833359854, - "grad_norm": 4.535762310028076, - "learning_rate": 4.955945708611067e-05, - "loss": 1.1684, - "step": 2990 - }, - { - "epoch": 0.026520978093672095, - "grad_norm": 3.30999755859375, - "learning_rate": 4.955798369843881e-05, - "loss": 1.0433, - "step": 3000 - }, - { - "epoch": 0.026609381353984336, - "grad_norm": 7.665961742401123, - "learning_rate": 4.955651031076693e-05, - "loss": 1.1903, - "step": 3010 - }, - { - "epoch": 0.026697784614296574, - "grad_norm": 5.118757724761963, - "learning_rate": 4.9555036923095063e-05, - "loss": 1.121, - "step": 3020 - }, - { - "epoch": 0.026786187874608815, - "grad_norm": 6.991201877593994, - "learning_rate": 4.9553563535423185e-05, - "loss": 1.1503, - "step": 3030 - }, - { - "epoch": 0.026874591134921057, - "grad_norm": 5.291536808013916, - "learning_rate": 4.955209014775132e-05, - "loss": 1.0973, - "step": 3040 - }, - { - "epoch": 0.026962994395233295, - "grad_norm": 5.872868537902832, - "learning_rate": 4.955061676007945e-05, - "loss": 1.1029, - "step": 3050 - }, - { - "epoch": 0.027051397655545536, - "grad_norm": 3.444056987762451, - "learning_rate": 4.954914337240758e-05, - "loss": 1.0218, - "step": 3060 - }, - { - "epoch": 0.027139800915857777, - "grad_norm": 6.330752372741699, - "learning_rate": 4.9547669984735705e-05, - "loss": 1.0465, - "step": 3070 - }, - { - "epoch": 0.02722820417617002, - "grad_norm": 7.991491317749023, - "learning_rate": 4.954619659706384e-05, - "loss": 1.0652, - "step": 3080 - }, - { - "epoch": 0.027316607436482256, - "grad_norm": 6.847014904022217, - "learning_rate": 4.954472320939196e-05, - "loss": 1.2172, - "step": 3090 - }, - { - "epoch": 0.027405010696794498, - "grad_norm": 8.696152687072754, - "learning_rate": 4.95432498217201e-05, - "loss": 1.1707, - "step": 3100 - }, - { - "epoch": 0.02749341395710674, - "grad_norm": 4.785496234893799, - "learning_rate": 4.9541776434048225e-05, - "loss": 1.0667, - "step": 3110 - }, - { - "epoch": 0.027581817217418977, - "grad_norm": 8.054608345031738, - "learning_rate": 4.9540303046376354e-05, - "loss": 1.204, - "step": 3120 - }, - { - "epoch": 0.027670220477731218, - "grad_norm": 6.484571933746338, - "learning_rate": 4.953882965870448e-05, - "loss": 1.1561, - "step": 3130 - }, - { - "epoch": 0.02775862373804346, - "grad_norm": 3.432863235473633, - "learning_rate": 4.953735627103261e-05, - "loss": 1.1016, - "step": 3140 - }, - { - "epoch": 0.0278470269983557, - "grad_norm": 4.097947597503662, - "learning_rate": 4.953588288336074e-05, - "loss": 1.0483, - "step": 3150 - }, - { - "epoch": 0.02793543025866794, - "grad_norm": 6.800324440002441, - "learning_rate": 4.9534409495688874e-05, - "loss": 1.0975, - "step": 3160 - }, - { - "epoch": 0.02802383351898018, - "grad_norm": 19.81365203857422, - "learning_rate": 4.9532936108016996e-05, - "loss": 1.1163, - "step": 3170 - }, - { - "epoch": 0.02811223677929242, - "grad_norm": 4.010586738586426, - "learning_rate": 4.953146272034513e-05, - "loss": 1.0635, - "step": 3180 - }, - { - "epoch": 0.028200640039604662, - "grad_norm": 5.544920444488525, - "learning_rate": 4.952998933267326e-05, - "loss": 1.0454, - "step": 3190 - }, - { - "epoch": 0.0282890432999169, - "grad_norm": 10.799997329711914, - "learning_rate": 4.952851594500139e-05, - "loss": 1.0682, - "step": 3200 - }, - { - "epoch": 0.02837744656022914, - "grad_norm": 11.736847877502441, - "learning_rate": 4.9527042557329516e-05, - "loss": 1.1602, - "step": 3210 - }, - { - "epoch": 0.028465849820541383, - "grad_norm": 6.757185459136963, - "learning_rate": 4.952556916965765e-05, - "loss": 1.1859, - "step": 3220 - }, - { - "epoch": 0.02855425308085362, - "grad_norm": 4.051261901855469, - "learning_rate": 4.952409578198577e-05, - "loss": 1.049, - "step": 3230 - }, - { - "epoch": 0.028642656341165862, - "grad_norm": 3.273878335952759, - "learning_rate": 4.952262239431391e-05, - "loss": 1.0997, - "step": 3240 - }, - { - "epoch": 0.028731059601478103, - "grad_norm": 3.5604453086853027, - "learning_rate": 4.952114900664203e-05, - "loss": 1.1143, - "step": 3250 - }, - { - "epoch": 0.028819462861790344, - "grad_norm": 7.519532203674316, - "learning_rate": 4.9519675618970164e-05, - "loss": 0.9892, - "step": 3260 - }, - { - "epoch": 0.028907866122102582, - "grad_norm": 8.42969799041748, - "learning_rate": 4.951820223129829e-05, - "loss": 1.0713, - "step": 3270 - }, - { - "epoch": 0.028996269382414824, - "grad_norm": 6.64420747756958, - "learning_rate": 4.951672884362642e-05, - "loss": 1.0585, - "step": 3280 - }, - { - "epoch": 0.029084672642727065, - "grad_norm": 5.263432025909424, - "learning_rate": 4.951525545595455e-05, - "loss": 1.1235, - "step": 3290 - }, - { - "epoch": 0.029173075903039303, - "grad_norm": 6.5242509841918945, - "learning_rate": 4.9513782068282684e-05, - "loss": 1.0536, - "step": 3300 - }, - { - "epoch": 0.029261479163351544, - "grad_norm": 5.996762275695801, - "learning_rate": 4.9512308680610806e-05, - "loss": 1.1308, - "step": 3310 - }, - { - "epoch": 0.029349882423663785, - "grad_norm": 7.299069404602051, - "learning_rate": 4.951083529293894e-05, - "loss": 1.0546, - "step": 3320 - }, - { - "epoch": 0.029438285683976027, - "grad_norm": 4.803090572357178, - "learning_rate": 4.950936190526707e-05, - "loss": 1.1245, - "step": 3330 - }, - { - "epoch": 0.029526688944288264, - "grad_norm": 5.4984130859375, - "learning_rate": 4.95078885175952e-05, - "loss": 1.0298, - "step": 3340 - }, - { - "epoch": 0.029615092204600506, - "grad_norm": 4.74207878112793, - "learning_rate": 4.9506415129923326e-05, - "loss": 1.0083, - "step": 3350 - }, - { - "epoch": 0.029703495464912747, - "grad_norm": 7.264033794403076, - "learning_rate": 4.9504941742251455e-05, - "loss": 1.0415, - "step": 3360 - }, - { - "epoch": 0.029791898725224985, - "grad_norm": 11.472503662109375, - "learning_rate": 4.950346835457958e-05, - "loss": 1.0234, - "step": 3370 - }, - { - "epoch": 0.029880301985537226, - "grad_norm": 10.286882400512695, - "learning_rate": 4.950199496690772e-05, - "loss": 1.0993, - "step": 3380 - }, - { - "epoch": 0.029968705245849468, - "grad_norm": 8.054348945617676, - "learning_rate": 4.950052157923584e-05, - "loss": 1.0699, - "step": 3390 - }, - { - "epoch": 0.03005710850616171, - "grad_norm": 4.6194610595703125, - "learning_rate": 4.9499048191563975e-05, - "loss": 1.0353, - "step": 3400 - }, - { - "epoch": 0.030145511766473947, - "grad_norm": 5.325869083404541, - "learning_rate": 4.94975748038921e-05, - "loss": 1.1676, - "step": 3410 - }, - { - "epoch": 0.030233915026786188, - "grad_norm": 3.003830671310425, - "learning_rate": 4.949610141622023e-05, - "loss": 0.9355, - "step": 3420 - }, - { - "epoch": 0.03032231828709843, - "grad_norm": 17.496395111083984, - "learning_rate": 4.949462802854836e-05, - "loss": 1.3197, - "step": 3430 - }, - { - "epoch": 0.030410721547410667, - "grad_norm": 5.781140327453613, - "learning_rate": 4.9493154640876495e-05, - "loss": 1.1219, - "step": 3440 - }, - { - "epoch": 0.03049912480772291, - "grad_norm": 4.405752182006836, - "learning_rate": 4.9491681253204617e-05, - "loss": 0.9957, - "step": 3450 - }, - { - "epoch": 0.03058752806803515, - "grad_norm": 3.774116039276123, - "learning_rate": 4.949020786553275e-05, - "loss": 0.9923, - "step": 3460 - }, - { - "epoch": 0.03067593132834739, - "grad_norm": 6.497739315032959, - "learning_rate": 4.948873447786088e-05, - "loss": 1.1951, - "step": 3470 - }, - { - "epoch": 0.03076433458865963, - "grad_norm": 6.66745662689209, - "learning_rate": 4.948726109018901e-05, - "loss": 1.1229, - "step": 3480 - }, - { - "epoch": 0.03085273784897187, - "grad_norm": 5.97498893737793, - "learning_rate": 4.948578770251714e-05, - "loss": 1.0747, - "step": 3490 - }, - { - "epoch": 0.03094114110928411, - "grad_norm": 9.32170581817627, - "learning_rate": 4.9484314314845265e-05, - "loss": 1.1125, - "step": 3500 - }, - { - "epoch": 0.03102954436959635, - "grad_norm": 5.269107818603516, - "learning_rate": 4.9482840927173393e-05, - "loss": 1.0845, - "step": 3510 - }, - { - "epoch": 0.03111794762990859, - "grad_norm": 5.987974643707275, - "learning_rate": 4.948136753950153e-05, - "loss": 1.0369, - "step": 3520 - }, - { - "epoch": 0.031206350890220832, - "grad_norm": 5.234177112579346, - "learning_rate": 4.947989415182965e-05, - "loss": 1.0749, - "step": 3530 - }, - { - "epoch": 0.03129475415053307, - "grad_norm": 3.885451555252075, - "learning_rate": 4.9478420764157785e-05, - "loss": 1.0154, - "step": 3540 - }, - { - "epoch": 0.03138315741084531, - "grad_norm": 5.198904514312744, - "learning_rate": 4.9476947376485914e-05, - "loss": 1.0572, - "step": 3550 - }, - { - "epoch": 0.031471560671157556, - "grad_norm": 5.708184719085693, - "learning_rate": 4.947547398881404e-05, - "loss": 1.1762, - "step": 3560 - }, - { - "epoch": 0.031559963931469794, - "grad_norm": 4.828928470611572, - "learning_rate": 4.947400060114217e-05, - "loss": 1.0067, - "step": 3570 - }, - { - "epoch": 0.03164836719178203, - "grad_norm": 3.9392752647399902, - "learning_rate": 4.9472527213470305e-05, - "loss": 1.0445, - "step": 3580 - }, - { - "epoch": 0.031736770452094276, - "grad_norm": 3.358604669570923, - "learning_rate": 4.947105382579843e-05, - "loss": 1.1345, - "step": 3590 - }, - { - "epoch": 0.031825173712406514, - "grad_norm": 6.108957290649414, - "learning_rate": 4.946958043812656e-05, - "loss": 1.0802, - "step": 3600 - }, - { - "epoch": 0.03191357697271875, - "grad_norm": 8.162700653076172, - "learning_rate": 4.946810705045469e-05, - "loss": 1.0491, - "step": 3610 - }, - { - "epoch": 0.032001980233031, - "grad_norm": 5.8155341148376465, - "learning_rate": 4.946663366278282e-05, - "loss": 1.1524, - "step": 3620 - }, - { - "epoch": 0.032090383493343234, - "grad_norm": 2.848914861679077, - "learning_rate": 4.946516027511095e-05, - "loss": 1.0649, - "step": 3630 - }, - { - "epoch": 0.03217878675365547, - "grad_norm": 4.426916599273682, - "learning_rate": 4.9463686887439076e-05, - "loss": 0.9712, - "step": 3640 - }, - { - "epoch": 0.03226719001396772, - "grad_norm": 3.7467966079711914, - "learning_rate": 4.9462213499767204e-05, - "loss": 1.1929, - "step": 3650 - }, - { - "epoch": 0.032355593274279955, - "grad_norm": 6.543834686279297, - "learning_rate": 4.946074011209534e-05, - "loss": 1.111, - "step": 3660 - }, - { - "epoch": 0.03244399653459219, - "grad_norm": 3.844190835952759, - "learning_rate": 4.945926672442347e-05, - "loss": 1.1195, - "step": 3670 - }, - { - "epoch": 0.03253239979490444, - "grad_norm": 6.082317352294922, - "learning_rate": 4.9457793336751596e-05, - "loss": 1.1465, - "step": 3680 - }, - { - "epoch": 0.032620803055216675, - "grad_norm": 6.333926200866699, - "learning_rate": 4.9456319949079724e-05, - "loss": 1.0256, - "step": 3690 - }, - { - "epoch": 0.03270920631552892, - "grad_norm": 4.987504959106445, - "learning_rate": 4.945484656140785e-05, - "loss": 1.0487, - "step": 3700 - }, - { - "epoch": 0.03279760957584116, - "grad_norm": 4.357222557067871, - "learning_rate": 4.945337317373598e-05, - "loss": 1.0347, - "step": 3710 - }, - { - "epoch": 0.032886012836153396, - "grad_norm": 2.961578369140625, - "learning_rate": 4.945189978606411e-05, - "loss": 0.9719, - "step": 3720 - }, - { - "epoch": 0.03297441609646564, - "grad_norm": 6.053518295288086, - "learning_rate": 4.9450426398392244e-05, - "loss": 1.2505, - "step": 3730 - }, - { - "epoch": 0.03306281935677788, - "grad_norm": 11.385558128356934, - "learning_rate": 4.944895301072037e-05, - "loss": 1.0115, - "step": 3740 - }, - { - "epoch": 0.033151222617090116, - "grad_norm": 7.48608922958374, - "learning_rate": 4.94474796230485e-05, - "loss": 0.9599, - "step": 3750 - }, - { - "epoch": 0.03323962587740236, - "grad_norm": 8.101400375366211, - "learning_rate": 4.944600623537663e-05, - "loss": 1.2365, - "step": 3760 - }, - { - "epoch": 0.0333280291377146, - "grad_norm": 3.3874876499176025, - "learning_rate": 4.944453284770476e-05, - "loss": 1.0796, - "step": 3770 - }, - { - "epoch": 0.03341643239802684, - "grad_norm": 4.571724891662598, - "learning_rate": 4.9443059460032886e-05, - "loss": 1.0822, - "step": 3780 - }, - { - "epoch": 0.03350483565833908, - "grad_norm": 7.061591625213623, - "learning_rate": 4.944158607236102e-05, - "loss": 0.9967, - "step": 3790 - }, - { - "epoch": 0.03359323891865132, - "grad_norm": 5.082997798919678, - "learning_rate": 4.944011268468915e-05, - "loss": 1.0895, - "step": 3800 - }, - { - "epoch": 0.03368164217896356, - "grad_norm": 3.501892566680908, - "learning_rate": 4.943863929701728e-05, - "loss": 1.1071, - "step": 3810 - }, - { - "epoch": 0.0337700454392758, - "grad_norm": 6.779443264007568, - "learning_rate": 4.9437165909345406e-05, - "loss": 0.994, - "step": 3820 - }, - { - "epoch": 0.03385844869958804, - "grad_norm": 5.151303768157959, - "learning_rate": 4.9435692521673535e-05, - "loss": 1.1656, - "step": 3830 - }, - { - "epoch": 0.033946851959900284, - "grad_norm": 2.304898738861084, - "learning_rate": 4.943421913400166e-05, - "loss": 1.1191, - "step": 3840 - }, - { - "epoch": 0.03403525522021252, - "grad_norm": 4.50697135925293, - "learning_rate": 4.94327457463298e-05, - "loss": 1.032, - "step": 3850 - }, - { - "epoch": 0.03412365848052476, - "grad_norm": 5.010416507720947, - "learning_rate": 4.943127235865792e-05, - "loss": 1.0835, - "step": 3860 - }, - { - "epoch": 0.034212061740837005, - "grad_norm": 6.183775901794434, - "learning_rate": 4.9429798970986055e-05, - "loss": 1.0877, - "step": 3870 - }, - { - "epoch": 0.03430046500114924, - "grad_norm": 7.286951065063477, - "learning_rate": 4.942832558331418e-05, - "loss": 1.1113, - "step": 3880 - }, - { - "epoch": 0.03438886826146148, - "grad_norm": 5.37122106552124, - "learning_rate": 4.942685219564231e-05, - "loss": 1.079, - "step": 3890 - }, - { - "epoch": 0.034477271521773725, - "grad_norm": 5.562955856323242, - "learning_rate": 4.942537880797044e-05, - "loss": 1.1608, - "step": 3900 - }, - { - "epoch": 0.03456567478208596, - "grad_norm": 5.252758026123047, - "learning_rate": 4.9423905420298575e-05, - "loss": 1.0415, - "step": 3910 - }, - { - "epoch": 0.0346540780423982, - "grad_norm": 6.825470924377441, - "learning_rate": 4.94224320326267e-05, - "loss": 1.0164, - "step": 3920 - }, - { - "epoch": 0.034742481302710446, - "grad_norm": 5.437935829162598, - "learning_rate": 4.942095864495483e-05, - "loss": 1.0165, - "step": 3930 - }, - { - "epoch": 0.034830884563022683, - "grad_norm": 6.060318946838379, - "learning_rate": 4.941948525728296e-05, - "loss": 1.1156, - "step": 3940 - }, - { - "epoch": 0.03491928782333492, - "grad_norm": 3.731422185897827, - "learning_rate": 4.941801186961109e-05, - "loss": 0.9086, - "step": 3950 - }, - { - "epoch": 0.035007691083647166, - "grad_norm": 7.884082794189453, - "learning_rate": 4.941653848193922e-05, - "loss": 1.1156, - "step": 3960 - }, - { - "epoch": 0.035096094343959404, - "grad_norm": 6.734913349151611, - "learning_rate": 4.9415065094267345e-05, - "loss": 1.0593, - "step": 3970 - }, - { - "epoch": 0.03518449760427165, - "grad_norm": 9.117546081542969, - "learning_rate": 4.9413591706595474e-05, - "loss": 1.0714, - "step": 3980 - }, - { - "epoch": 0.035272900864583887, - "grad_norm": 16.266523361206055, - "learning_rate": 4.941211831892361e-05, - "loss": 1.1006, - "step": 3990 - }, - { - "epoch": 0.035361304124896124, - "grad_norm": 3.810106039047241, - "learning_rate": 4.941064493125173e-05, - "loss": 0.9989, - "step": 4000 - }, - { - "epoch": 0.03544970738520837, - "grad_norm": 4.392092704772949, - "learning_rate": 4.9409171543579865e-05, - "loss": 1.0222, - "step": 4010 - }, - { - "epoch": 0.03553811064552061, - "grad_norm": 4.306662559509277, - "learning_rate": 4.9407698155907994e-05, - "loss": 1.0154, - "step": 4020 - }, - { - "epoch": 0.035626513905832845, - "grad_norm": 3.4554357528686523, - "learning_rate": 4.940622476823612e-05, - "loss": 1.0859, - "step": 4030 - }, - { - "epoch": 0.03571491716614509, - "grad_norm": 4.0056562423706055, - "learning_rate": 4.940475138056425e-05, - "loss": 1.0923, - "step": 4040 - }, - { - "epoch": 0.03580332042645733, - "grad_norm": 3.4190101623535156, - "learning_rate": 4.9403277992892386e-05, - "loss": 0.9547, - "step": 4050 - }, - { - "epoch": 0.035891723686769565, - "grad_norm": 4.973897457122803, - "learning_rate": 4.940180460522051e-05, - "loss": 1.2254, - "step": 4060 - }, - { - "epoch": 0.03598012694708181, - "grad_norm": 4.1999359130859375, - "learning_rate": 4.940033121754864e-05, - "loss": 0.9756, - "step": 4070 - }, - { - "epoch": 0.03606853020739405, - "grad_norm": 12.506514549255371, - "learning_rate": 4.9398857829876764e-05, - "loss": 0.9271, - "step": 4080 - }, - { - "epoch": 0.036156933467706286, - "grad_norm": 9.009496688842773, - "learning_rate": 4.93973844422049e-05, - "loss": 1.0405, - "step": 4090 - }, - { - "epoch": 0.03624533672801853, - "grad_norm": 4.755258560180664, - "learning_rate": 4.939591105453303e-05, - "loss": 0.9885, - "step": 4100 - }, - { - "epoch": 0.03633373998833077, - "grad_norm": 7.77773904800415, - "learning_rate": 4.9394437666861156e-05, - "loss": 1.1745, - "step": 4110 - }, - { - "epoch": 0.03642214324864301, - "grad_norm": 3.6486005783081055, - "learning_rate": 4.9392964279189284e-05, - "loss": 1.0569, - "step": 4120 - }, - { - "epoch": 0.03651054650895525, - "grad_norm": 6.329847812652588, - "learning_rate": 4.939149089151742e-05, - "loss": 1.0719, - "step": 4130 - }, - { - "epoch": 0.03659894976926749, - "grad_norm": 4.576066493988037, - "learning_rate": 4.939001750384554e-05, - "loss": 1.0469, - "step": 4140 - }, - { - "epoch": 0.03668735302957973, - "grad_norm": 5.984560966491699, - "learning_rate": 4.9388544116173676e-05, - "loss": 1.032, - "step": 4150 - }, - { - "epoch": 0.03677575628989197, - "grad_norm": 8.831100463867188, - "learning_rate": 4.9387070728501804e-05, - "loss": 1.0485, - "step": 4160 - }, - { - "epoch": 0.03686415955020421, - "grad_norm": 3.0822088718414307, - "learning_rate": 4.938559734082993e-05, - "loss": 0.9294, - "step": 4170 - }, - { - "epoch": 0.036952562810516454, - "grad_norm": 6.145000457763672, - "learning_rate": 4.938412395315806e-05, - "loss": 1.1835, - "step": 4180 - }, - { - "epoch": 0.03704096607082869, - "grad_norm": 3.957902193069458, - "learning_rate": 4.938265056548619e-05, - "loss": 1.0971, - "step": 4190 - }, - { - "epoch": 0.03712936933114093, - "grad_norm": 6.664913654327393, - "learning_rate": 4.938117717781432e-05, - "loss": 0.9565, - "step": 4200 - }, - { - "epoch": 0.037217772591453174, - "grad_norm": 4.730056285858154, - "learning_rate": 4.937970379014245e-05, - "loss": 1.1818, - "step": 4210 - }, - { - "epoch": 0.03730617585176541, - "grad_norm": 3.4853060245513916, - "learning_rate": 4.9378230402470574e-05, - "loss": 1.108, - "step": 4220 - }, - { - "epoch": 0.03739457911207766, - "grad_norm": 5.281261920928955, - "learning_rate": 4.937675701479871e-05, - "loss": 1.0069, - "step": 4230 - }, - { - "epoch": 0.037482982372389895, - "grad_norm": 3.6927974224090576, - "learning_rate": 4.937528362712684e-05, - "loss": 1.0068, - "step": 4240 - }, - { - "epoch": 0.03757138563270213, - "grad_norm": 5.452990531921387, - "learning_rate": 4.9373810239454966e-05, - "loss": 1.0916, - "step": 4250 - }, - { - "epoch": 0.03765978889301438, - "grad_norm": 6.807621479034424, - "learning_rate": 4.9372336851783095e-05, - "loss": 0.9799, - "step": 4260 - }, - { - "epoch": 0.037748192153326615, - "grad_norm": 6.8397393226623535, - "learning_rate": 4.937086346411123e-05, - "loss": 1.0614, - "step": 4270 - }, - { - "epoch": 0.03783659541363885, - "grad_norm": 3.4389631748199463, - "learning_rate": 4.936939007643935e-05, - "loss": 1.0625, - "step": 4280 - }, - { - "epoch": 0.0379249986739511, - "grad_norm": 6.080694675445557, - "learning_rate": 4.9367916688767486e-05, - "loss": 1.1324, - "step": 4290 - }, - { - "epoch": 0.038013401934263336, - "grad_norm": 3.531956911087036, - "learning_rate": 4.936644330109561e-05, - "loss": 1.0415, - "step": 4300 - }, - { - "epoch": 0.03810180519457557, - "grad_norm": 6.8647894859313965, - "learning_rate": 4.936496991342374e-05, - "loss": 1.0879, - "step": 4310 - }, - { - "epoch": 0.03819020845488782, - "grad_norm": 5.773015975952148, - "learning_rate": 4.936349652575187e-05, - "loss": 1.0298, - "step": 4320 - }, - { - "epoch": 0.038278611715200056, - "grad_norm": 11.786985397338867, - "learning_rate": 4.936202313808e-05, - "loss": 1.0216, - "step": 4330 - }, - { - "epoch": 0.038367014975512294, - "grad_norm": 9.405512809753418, - "learning_rate": 4.936054975040813e-05, - "loss": 0.9559, - "step": 4340 - }, - { - "epoch": 0.03845541823582454, - "grad_norm": 5.85341739654541, - "learning_rate": 4.935907636273626e-05, - "loss": 1.1283, - "step": 4350 - }, - { - "epoch": 0.038543821496136776, - "grad_norm": 3.5939722061157227, - "learning_rate": 4.9357602975064385e-05, - "loss": 1.0697, - "step": 4360 - }, - { - "epoch": 0.03863222475644902, - "grad_norm": 4.775249481201172, - "learning_rate": 4.935612958739252e-05, - "loss": 0.9419, - "step": 4370 - }, - { - "epoch": 0.03872062801676126, - "grad_norm": 3.026939868927002, - "learning_rate": 4.935465619972065e-05, - "loss": 1.0233, - "step": 4380 - }, - { - "epoch": 0.0388090312770735, - "grad_norm": 3.604031562805176, - "learning_rate": 4.935318281204878e-05, - "loss": 1.1227, - "step": 4390 - }, - { - "epoch": 0.03889743453738574, - "grad_norm": 5.833409786224365, - "learning_rate": 4.9351709424376905e-05, - "loss": 1.0837, - "step": 4400 - }, - { - "epoch": 0.03898583779769798, - "grad_norm": 8.4013090133667, - "learning_rate": 4.935023603670504e-05, - "loss": 1.074, - "step": 4410 - }, - { - "epoch": 0.03907424105801022, - "grad_norm": 8.928999900817871, - "learning_rate": 4.934876264903316e-05, - "loss": 1.0495, - "step": 4420 - }, - { - "epoch": 0.03916264431832246, - "grad_norm": 6.564711570739746, - "learning_rate": 4.93472892613613e-05, - "loss": 1.0646, - "step": 4430 - }, - { - "epoch": 0.0392510475786347, - "grad_norm": 4.471921920776367, - "learning_rate": 4.934581587368942e-05, - "loss": 1.1081, - "step": 4440 - }, - { - "epoch": 0.03933945083894694, - "grad_norm": 5.407211780548096, - "learning_rate": 4.9344342486017554e-05, - "loss": 1.1002, - "step": 4450 - }, - { - "epoch": 0.03942785409925918, - "grad_norm": 3.1841092109680176, - "learning_rate": 4.934286909834568e-05, - "loss": 1.0101, - "step": 4460 - }, - { - "epoch": 0.03951625735957142, - "grad_norm": 6.48630952835083, - "learning_rate": 4.934139571067381e-05, - "loss": 0.9795, - "step": 4470 - }, - { - "epoch": 0.03960466061988366, - "grad_norm": 6.4455389976501465, - "learning_rate": 4.933992232300194e-05, - "loss": 0.9063, - "step": 4480 - }, - { - "epoch": 0.0396930638801959, - "grad_norm": 6.892740249633789, - "learning_rate": 4.9338448935330074e-05, - "loss": 1.1026, - "step": 4490 - }, - { - "epoch": 0.03978146714050814, - "grad_norm": 6.074153423309326, - "learning_rate": 4.9336975547658195e-05, - "loss": 1.0885, - "step": 4500 - }, - { - "epoch": 0.039869870400820386, - "grad_norm": 6.199062824249268, - "learning_rate": 4.933550215998633e-05, - "loss": 1.0653, - "step": 4510 - }, - { - "epoch": 0.03995827366113262, - "grad_norm": 4.292714595794678, - "learning_rate": 4.933402877231446e-05, - "loss": 1.0616, - "step": 4520 - }, - { - "epoch": 0.04004667692144486, - "grad_norm": 5.541876792907715, - "learning_rate": 4.933255538464259e-05, - "loss": 1.045, - "step": 4530 - }, - { - "epoch": 0.040135080181757106, - "grad_norm": 5.079677581787109, - "learning_rate": 4.9331081996970716e-05, - "loss": 0.9155, - "step": 4540 - }, - { - "epoch": 0.040223483442069344, - "grad_norm": 4.602534770965576, - "learning_rate": 4.9329608609298844e-05, - "loss": 1.0384, - "step": 4550 - }, - { - "epoch": 0.04031188670238158, - "grad_norm": 5.581985950469971, - "learning_rate": 4.932813522162697e-05, - "loss": 1.0825, - "step": 4560 - }, - { - "epoch": 0.040400289962693826, - "grad_norm": 6.3072896003723145, - "learning_rate": 4.932666183395511e-05, - "loss": 1.0934, - "step": 4570 - }, - { - "epoch": 0.040488693223006064, - "grad_norm": 4.629852294921875, - "learning_rate": 4.9325188446283236e-05, - "loss": 1.0087, - "step": 4580 - }, - { - "epoch": 0.0405770964833183, - "grad_norm": 3.7570137977600098, - "learning_rate": 4.9323715058611364e-05, - "loss": 1.0211, - "step": 4590 - }, - { - "epoch": 0.04066549974363055, - "grad_norm": 9.198297500610352, - "learning_rate": 4.932224167093949e-05, - "loss": 0.9675, - "step": 4600 - }, - { - "epoch": 0.040753903003942785, - "grad_norm": 5.447032928466797, - "learning_rate": 4.932076828326762e-05, - "loss": 1.0059, - "step": 4610 - }, - { - "epoch": 0.04084230626425502, - "grad_norm": 6.04211950302124, - "learning_rate": 4.931929489559575e-05, - "loss": 1.0281, - "step": 4620 - }, - { - "epoch": 0.04093070952456727, - "grad_norm": 8.443472862243652, - "learning_rate": 4.9317821507923884e-05, - "loss": 1.0628, - "step": 4630 - }, - { - "epoch": 0.041019112784879505, - "grad_norm": 4.586818695068359, - "learning_rate": 4.931634812025201e-05, - "loss": 0.9729, - "step": 4640 - }, - { - "epoch": 0.04110751604519175, - "grad_norm": 5.2301344871521, - "learning_rate": 4.931487473258014e-05, - "loss": 1.0448, - "step": 4650 - }, - { - "epoch": 0.04119591930550399, - "grad_norm": 5.564103603363037, - "learning_rate": 4.931340134490827e-05, - "loss": 1.12, - "step": 4660 - }, - { - "epoch": 0.041284322565816226, - "grad_norm": 4.086526393890381, - "learning_rate": 4.93119279572364e-05, - "loss": 1.0465, - "step": 4670 - }, - { - "epoch": 0.04137272582612847, - "grad_norm": 5.185882091522217, - "learning_rate": 4.9310454569564526e-05, - "loss": 0.9242, - "step": 4680 - }, - { - "epoch": 0.04146112908644071, - "grad_norm": 8.439657211303711, - "learning_rate": 4.9308981181892654e-05, - "loss": 0.9688, - "step": 4690 - }, - { - "epoch": 0.041549532346752946, - "grad_norm": 4.6650800704956055, - "learning_rate": 4.930750779422079e-05, - "loss": 1.0498, - "step": 4700 - }, - { - "epoch": 0.04163793560706519, - "grad_norm": 5.943160057067871, - "learning_rate": 4.930603440654892e-05, - "loss": 1.1057, - "step": 4710 - }, - { - "epoch": 0.04172633886737743, - "grad_norm": 4.689329624176025, - "learning_rate": 4.9304561018877046e-05, - "loss": 1.0006, - "step": 4720 - }, - { - "epoch": 0.041814742127689666, - "grad_norm": 8.358399391174316, - "learning_rate": 4.9303087631205175e-05, - "loss": 1.0855, - "step": 4730 - }, - { - "epoch": 0.04190314538800191, - "grad_norm": 4.517096519470215, - "learning_rate": 4.93016142435333e-05, - "loss": 0.9695, - "step": 4740 - }, - { - "epoch": 0.04199154864831415, - "grad_norm": 6.596667289733887, - "learning_rate": 4.930014085586143e-05, - "loss": 1.0451, - "step": 4750 - }, - { - "epoch": 0.04207995190862639, - "grad_norm": 4.656362056732178, - "learning_rate": 4.9298667468189566e-05, - "loss": 0.9147, - "step": 4760 - }, - { - "epoch": 0.04216835516893863, - "grad_norm": 11.312739372253418, - "learning_rate": 4.9297194080517695e-05, - "loss": 0.9862, - "step": 4770 - }, - { - "epoch": 0.04225675842925087, - "grad_norm": 4.505188465118408, - "learning_rate": 4.929572069284582e-05, - "loss": 1.0174, - "step": 4780 - }, - { - "epoch": 0.042345161689563114, - "grad_norm": 4.732143878936768, - "learning_rate": 4.929424730517395e-05, - "loss": 0.9307, - "step": 4790 - }, - { - "epoch": 0.04243356494987535, - "grad_norm": 4.13836669921875, - "learning_rate": 4.929277391750208e-05, - "loss": 0.9735, - "step": 4800 - }, - { - "epoch": 0.04252196821018759, - "grad_norm": 5.821000099182129, - "learning_rate": 4.929130052983021e-05, - "loss": 0.9195, - "step": 4810 - }, - { - "epoch": 0.042610371470499835, - "grad_norm": 3.0030617713928223, - "learning_rate": 4.928982714215834e-05, - "loss": 0.9304, - "step": 4820 - }, - { - "epoch": 0.04269877473081207, - "grad_norm": 3.2358474731445312, - "learning_rate": 4.9288353754486465e-05, - "loss": 1.1401, - "step": 4830 - }, - { - "epoch": 0.04278717799112431, - "grad_norm": 4.475287914276123, - "learning_rate": 4.92868803668146e-05, - "loss": 0.9251, - "step": 4840 - }, - { - "epoch": 0.042875581251436555, - "grad_norm": 5.249177932739258, - "learning_rate": 4.928540697914273e-05, - "loss": 1.0662, - "step": 4850 - }, - { - "epoch": 0.04296398451174879, - "grad_norm": 6.232396602630615, - "learning_rate": 4.928393359147086e-05, - "loss": 1.0454, - "step": 4860 - }, - { - "epoch": 0.04305238777206103, - "grad_norm": 3.666123390197754, - "learning_rate": 4.9282460203798985e-05, - "loss": 1.0652, - "step": 4870 - }, - { - "epoch": 0.043140791032373275, - "grad_norm": 6.440056324005127, - "learning_rate": 4.928098681612712e-05, - "loss": 1.0286, - "step": 4880 - }, - { - "epoch": 0.04322919429268551, - "grad_norm": 4.403159141540527, - "learning_rate": 4.927951342845524e-05, - "loss": 1.0054, - "step": 4890 - }, - { - "epoch": 0.04331759755299775, - "grad_norm": 5.795849800109863, - "learning_rate": 4.927804004078338e-05, - "loss": 1.0152, - "step": 4900 - }, - { - "epoch": 0.043406000813309996, - "grad_norm": 12.21654987335205, - "learning_rate": 4.92765666531115e-05, - "loss": 0.9303, - "step": 4910 - }, - { - "epoch": 0.043494404073622234, - "grad_norm": 4.522624969482422, - "learning_rate": 4.9275093265439634e-05, - "loss": 1.0237, - "step": 4920 - }, - { - "epoch": 0.04358280733393448, - "grad_norm": 7.325983047485352, - "learning_rate": 4.927361987776776e-05, - "loss": 0.9337, - "step": 4930 - }, - { - "epoch": 0.043671210594246716, - "grad_norm": 11.711366653442383, - "learning_rate": 4.927214649009589e-05, - "loss": 1.0331, - "step": 4940 - }, - { - "epoch": 0.043759613854558954, - "grad_norm": 7.563577651977539, - "learning_rate": 4.927067310242402e-05, - "loss": 1.0056, - "step": 4950 - }, - { - "epoch": 0.0438480171148712, - "grad_norm": 3.4880125522613525, - "learning_rate": 4.9269199714752154e-05, - "loss": 0.8889, - "step": 4960 - }, - { - "epoch": 0.04393642037518344, - "grad_norm": 8.676736831665039, - "learning_rate": 4.9267726327080275e-05, - "loss": 1.2163, - "step": 4970 - }, - { - "epoch": 0.044024823635495675, - "grad_norm": 6.389759540557861, - "learning_rate": 4.926625293940841e-05, - "loss": 1.0256, - "step": 4980 - }, - { - "epoch": 0.04411322689580792, - "grad_norm": 5.722421646118164, - "learning_rate": 4.926477955173654e-05, - "loss": 0.9956, - "step": 4990 - }, - { - "epoch": 0.04420163015612016, - "grad_norm": 3.8869667053222656, - "learning_rate": 4.926330616406467e-05, - "loss": 0.924, - "step": 5000 - }, - { - "epoch": 0.044290033416432395, - "grad_norm": 9.306219100952148, - "learning_rate": 4.9261832776392796e-05, - "loss": 1.1055, - "step": 5010 - }, - { - "epoch": 0.04437843667674464, - "grad_norm": 5.7872633934021, - "learning_rate": 4.9260359388720924e-05, - "loss": 0.9434, - "step": 5020 - }, - { - "epoch": 0.04446683993705688, - "grad_norm": 2.841301679611206, - "learning_rate": 4.925888600104905e-05, - "loss": 0.9664, - "step": 5030 - }, - { - "epoch": 0.04455524319736912, - "grad_norm": 5.8848748207092285, - "learning_rate": 4.925741261337719e-05, - "loss": 0.9785, - "step": 5040 - }, - { - "epoch": 0.04464364645768136, - "grad_norm": 5.913470268249512, - "learning_rate": 4.925593922570531e-05, - "loss": 0.9967, - "step": 5050 - }, - { - "epoch": 0.0447320497179936, - "grad_norm": 5.830585479736328, - "learning_rate": 4.9254465838033444e-05, - "loss": 1.0564, - "step": 5060 - }, - { - "epoch": 0.04482045297830584, - "grad_norm": 3.5792245864868164, - "learning_rate": 4.925299245036157e-05, - "loss": 1.2437, - "step": 5070 - }, - { - "epoch": 0.04490885623861808, - "grad_norm": 5.205353736877441, - "learning_rate": 4.92515190626897e-05, - "loss": 1.2016, - "step": 5080 - }, - { - "epoch": 0.04499725949893032, - "grad_norm": 4.309473514556885, - "learning_rate": 4.925004567501783e-05, - "loss": 0.931, - "step": 5090 - }, - { - "epoch": 0.04508566275924256, - "grad_norm": 7.3599982261657715, - "learning_rate": 4.9248572287345964e-05, - "loss": 0.9347, - "step": 5100 - }, - { - "epoch": 0.0451740660195548, - "grad_norm": 4.055037021636963, - "learning_rate": 4.9247098899674086e-05, - "loss": 0.9706, - "step": 5110 - }, - { - "epoch": 0.04526246927986704, - "grad_norm": 7.1950578689575195, - "learning_rate": 4.924562551200222e-05, - "loss": 0.9902, - "step": 5120 - }, - { - "epoch": 0.045350872540179284, - "grad_norm": 3.477550506591797, - "learning_rate": 4.924415212433034e-05, - "loss": 1.1443, - "step": 5130 - }, - { - "epoch": 0.04543927580049152, - "grad_norm": 6.067864418029785, - "learning_rate": 4.924267873665848e-05, - "loss": 0.9565, - "step": 5140 - }, - { - "epoch": 0.04552767906080376, - "grad_norm": 4.849110126495361, - "learning_rate": 4.9241205348986606e-05, - "loss": 0.9515, - "step": 5150 - }, - { - "epoch": 0.045616082321116004, - "grad_norm": 6.101007461547852, - "learning_rate": 4.9239731961314734e-05, - "loss": 0.927, - "step": 5160 - }, - { - "epoch": 0.04570448558142824, - "grad_norm": 9.797233581542969, - "learning_rate": 4.923825857364286e-05, - "loss": 1.0617, - "step": 5170 - }, - { - "epoch": 0.04579288884174049, - "grad_norm": 6.435054302215576, - "learning_rate": 4.9236785185971e-05, - "loss": 0.9908, - "step": 5180 - }, - { - "epoch": 0.045881292102052725, - "grad_norm": 6.170694828033447, - "learning_rate": 4.923531179829912e-05, - "loss": 1.0215, - "step": 5190 - }, - { - "epoch": 0.04596969536236496, - "grad_norm": 5.148831844329834, - "learning_rate": 4.9233838410627255e-05, - "loss": 1.0137, - "step": 5200 - }, - { - "epoch": 0.04605809862267721, - "grad_norm": 5.061634540557861, - "learning_rate": 4.923236502295538e-05, - "loss": 0.9616, - "step": 5210 - }, - { - "epoch": 0.046146501882989445, - "grad_norm": 6.394944190979004, - "learning_rate": 4.923089163528351e-05, - "loss": 1.0306, - "step": 5220 - }, - { - "epoch": 0.04623490514330168, - "grad_norm": 3.3894765377044678, - "learning_rate": 4.922941824761164e-05, - "loss": 1.112, - "step": 5230 - }, - { - "epoch": 0.04632330840361393, - "grad_norm": 2.86142635345459, - "learning_rate": 4.9227944859939775e-05, - "loss": 0.9926, - "step": 5240 - }, - { - "epoch": 0.046411711663926165, - "grad_norm": 5.8516693115234375, - "learning_rate": 4.9226471472267896e-05, - "loss": 0.8999, - "step": 5250 - }, - { - "epoch": 0.0465001149242384, - "grad_norm": 5.580111503601074, - "learning_rate": 4.922499808459603e-05, - "loss": 0.9504, - "step": 5260 - }, - { - "epoch": 0.04658851818455065, - "grad_norm": 8.920647621154785, - "learning_rate": 4.922352469692415e-05, - "loss": 1.0487, - "step": 5270 - }, - { - "epoch": 0.046676921444862886, - "grad_norm": 6.242912769317627, - "learning_rate": 4.922205130925229e-05, - "loss": 1.0432, - "step": 5280 - }, - { - "epoch": 0.046765324705175124, - "grad_norm": 7.302402496337891, - "learning_rate": 4.922057792158042e-05, - "loss": 1.1208, - "step": 5290 - }, - { - "epoch": 0.04685372796548737, - "grad_norm": 9.732843399047852, - "learning_rate": 4.9219104533908545e-05, - "loss": 0.9864, - "step": 5300 - }, - { - "epoch": 0.046942131225799606, - "grad_norm": 4.585763931274414, - "learning_rate": 4.921763114623667e-05, - "loss": 1.0379, - "step": 5310 - }, - { - "epoch": 0.04703053448611185, - "grad_norm": 5.401805400848389, - "learning_rate": 4.921615775856481e-05, - "loss": 0.9634, - "step": 5320 - }, - { - "epoch": 0.04711893774642409, - "grad_norm": 6.35153865814209, - "learning_rate": 4.921468437089293e-05, - "loss": 1.0483, - "step": 5330 - }, - { - "epoch": 0.04720734100673633, - "grad_norm": 5.659788131713867, - "learning_rate": 4.9213210983221065e-05, - "loss": 1.0511, - "step": 5340 - }, - { - "epoch": 0.04729574426704857, - "grad_norm": 4.884911060333252, - "learning_rate": 4.9211737595549194e-05, - "loss": 1.0186, - "step": 5350 - }, - { - "epoch": 0.04738414752736081, - "grad_norm": 4.303860187530518, - "learning_rate": 4.921026420787732e-05, - "loss": 0.9606, - "step": 5360 - }, - { - "epoch": 0.04747255078767305, - "grad_norm": 10.028541564941406, - "learning_rate": 4.920879082020545e-05, - "loss": 1.1099, - "step": 5370 - }, - { - "epoch": 0.04756095404798529, - "grad_norm": 6.026510715484619, - "learning_rate": 4.920731743253358e-05, - "loss": 1.0223, - "step": 5380 - }, - { - "epoch": 0.04764935730829753, - "grad_norm": 3.0914306640625, - "learning_rate": 4.920584404486171e-05, - "loss": 0.9467, - "step": 5390 - }, - { - "epoch": 0.04773776056860977, - "grad_norm": 6.44119119644165, - "learning_rate": 4.920437065718984e-05, - "loss": 0.9499, - "step": 5400 - }, - { - "epoch": 0.04782616382892201, - "grad_norm": 3.9868052005767822, - "learning_rate": 4.9202897269517964e-05, - "loss": 0.987, - "step": 5410 - }, - { - "epoch": 0.04791456708923425, - "grad_norm": 7.742058753967285, - "learning_rate": 4.92014238818461e-05, - "loss": 1.0558, - "step": 5420 - }, - { - "epoch": 0.04800297034954649, - "grad_norm": 7.036317825317383, - "learning_rate": 4.919995049417423e-05, - "loss": 0.9177, - "step": 5430 - }, - { - "epoch": 0.04809137360985873, - "grad_norm": 9.096891403198242, - "learning_rate": 4.9198477106502355e-05, - "loss": 0.9944, - "step": 5440 - }, - { - "epoch": 0.04817977687017097, - "grad_norm": 6.504842758178711, - "learning_rate": 4.9197003718830484e-05, - "loss": 1.0012, - "step": 5450 - }, - { - "epoch": 0.048268180130483215, - "grad_norm": 3.3751108646392822, - "learning_rate": 4.919553033115862e-05, - "loss": 1.0993, - "step": 5460 - }, - { - "epoch": 0.04835658339079545, - "grad_norm": 3.9735724925994873, - "learning_rate": 4.919405694348674e-05, - "loss": 0.9421, - "step": 5470 - }, - { - "epoch": 0.04844498665110769, - "grad_norm": 7.27800178527832, - "learning_rate": 4.9192583555814876e-05, - "loss": 1.0485, - "step": 5480 - }, - { - "epoch": 0.048533389911419936, - "grad_norm": 6.35645055770874, - "learning_rate": 4.9191110168143004e-05, - "loss": 0.977, - "step": 5490 - }, - { - "epoch": 0.048621793171732174, - "grad_norm": 5.093990325927734, - "learning_rate": 4.918963678047113e-05, - "loss": 1.1082, - "step": 5500 - }, - { - "epoch": 0.04871019643204441, - "grad_norm": 7.979743957519531, - "learning_rate": 4.918816339279926e-05, - "loss": 0.9865, - "step": 5510 - }, - { - "epoch": 0.048798599692356656, - "grad_norm": 6.810271739959717, - "learning_rate": 4.918669000512739e-05, - "loss": 1.0031, - "step": 5520 - }, - { - "epoch": 0.048887002952668894, - "grad_norm": 3.874130964279175, - "learning_rate": 4.918521661745552e-05, - "loss": 0.9567, - "step": 5530 - }, - { - "epoch": 0.04897540621298113, - "grad_norm": 4.50253963470459, - "learning_rate": 4.918374322978365e-05, - "loss": 0.9467, - "step": 5540 - }, - { - "epoch": 0.04906380947329338, - "grad_norm": 6.759551048278809, - "learning_rate": 4.918226984211178e-05, - "loss": 0.9026, - "step": 5550 - }, - { - "epoch": 0.049152212733605614, - "grad_norm": 8.089125633239746, - "learning_rate": 4.918079645443991e-05, - "loss": 0.9087, - "step": 5560 - }, - { - "epoch": 0.04924061599391785, - "grad_norm": 5.775022029876709, - "learning_rate": 4.917932306676804e-05, - "loss": 0.9098, - "step": 5570 - }, - { - "epoch": 0.0493290192542301, - "grad_norm": 3.289224624633789, - "learning_rate": 4.9177849679096166e-05, - "loss": 0.9084, - "step": 5580 - }, - { - "epoch": 0.049417422514542335, - "grad_norm": 5.4761962890625, - "learning_rate": 4.9176376291424294e-05, - "loss": 0.9332, - "step": 5590 - }, - { - "epoch": 0.04950582577485458, - "grad_norm": 8.073454856872559, - "learning_rate": 4.917490290375242e-05, - "loss": 1.0176, - "step": 5600 - }, - { - "epoch": 0.04959422903516682, - "grad_norm": 8.242021560668945, - "learning_rate": 4.917342951608056e-05, - "loss": 1.0431, - "step": 5610 - }, - { - "epoch": 0.049682632295479055, - "grad_norm": 5.374790191650391, - "learning_rate": 4.9171956128408686e-05, - "loss": 0.9881, - "step": 5620 - }, - { - "epoch": 0.0497710355557913, - "grad_norm": 5.706557273864746, - "learning_rate": 4.9170482740736815e-05, - "loss": 0.9989, - "step": 5630 - }, - { - "epoch": 0.04985943881610354, - "grad_norm": 6.9351654052734375, - "learning_rate": 4.916900935306494e-05, - "loss": 0.9272, - "step": 5640 - }, - { - "epoch": 0.049947842076415776, - "grad_norm": 3.7204599380493164, - "learning_rate": 4.916753596539307e-05, - "loss": 0.9965, - "step": 5650 - }, - { - "epoch": 0.05003624533672802, - "grad_norm": 2.9318227767944336, - "learning_rate": 4.91660625777212e-05, - "loss": 0.936, - "step": 5660 - }, - { - "epoch": 0.05012464859704026, - "grad_norm": 8.022659301757812, - "learning_rate": 4.9164589190049335e-05, - "loss": 1.0342, - "step": 5670 - }, - { - "epoch": 0.050213051857352496, - "grad_norm": 5.471922397613525, - "learning_rate": 4.916311580237746e-05, - "loss": 1.0371, - "step": 5680 - }, - { - "epoch": 0.05030145511766474, - "grad_norm": 8.396404266357422, - "learning_rate": 4.916164241470559e-05, - "loss": 0.9629, - "step": 5690 - }, - { - "epoch": 0.05038985837797698, - "grad_norm": 10.464508056640625, - "learning_rate": 4.916016902703372e-05, - "loss": 0.9274, - "step": 5700 - }, - { - "epoch": 0.050478261638289224, - "grad_norm": 6.246397972106934, - "learning_rate": 4.915869563936185e-05, - "loss": 1.1343, - "step": 5710 - }, - { - "epoch": 0.05056666489860146, - "grad_norm": 8.593369483947754, - "learning_rate": 4.9157222251689977e-05, - "loss": 0.9722, - "step": 5720 - }, - { - "epoch": 0.0506550681589137, - "grad_norm": 12.5711030960083, - "learning_rate": 4.915574886401811e-05, - "loss": 0.9718, - "step": 5730 - }, - { - "epoch": 0.050743471419225944, - "grad_norm": 4.524014949798584, - "learning_rate": 4.915427547634623e-05, - "loss": 0.9674, - "step": 5740 - }, - { - "epoch": 0.05083187467953818, - "grad_norm": 4.370037078857422, - "learning_rate": 4.915280208867437e-05, - "loss": 1.1288, - "step": 5750 - }, - { - "epoch": 0.05092027793985042, - "grad_norm": 3.817610740661621, - "learning_rate": 4.91513287010025e-05, - "loss": 0.9584, - "step": 5760 - }, - { - "epoch": 0.051008681200162664, - "grad_norm": 5.315976619720459, - "learning_rate": 4.9149855313330625e-05, - "loss": 0.995, - "step": 5770 - }, - { - "epoch": 0.0510970844604749, - "grad_norm": 4.982341289520264, - "learning_rate": 4.9148381925658753e-05, - "loss": 0.9847, - "step": 5780 - }, - { - "epoch": 0.05118548772078714, - "grad_norm": 7.3806586265563965, - "learning_rate": 4.914690853798689e-05, - "loss": 0.8998, - "step": 5790 - }, - { - "epoch": 0.051273890981099385, - "grad_norm": 6.027826309204102, - "learning_rate": 4.914543515031501e-05, - "loss": 1.0207, - "step": 5800 - }, - { - "epoch": 0.05136229424141162, - "grad_norm": 4.423072814941406, - "learning_rate": 4.9143961762643145e-05, - "loss": 0.9825, - "step": 5810 - }, - { - "epoch": 0.05145069750172386, - "grad_norm": 9.734428405761719, - "learning_rate": 4.9142488374971274e-05, - "loss": 1.0604, - "step": 5820 - }, - { - "epoch": 0.051539100762036105, - "grad_norm": 5.924389362335205, - "learning_rate": 4.91410149872994e-05, - "loss": 1.022, - "step": 5830 - }, - { - "epoch": 0.05162750402234834, - "grad_norm": 9.964608192443848, - "learning_rate": 4.913954159962753e-05, - "loss": 1.0702, - "step": 5840 - }, - { - "epoch": 0.05171590728266059, - "grad_norm": 5.59250545501709, - "learning_rate": 4.913806821195566e-05, - "loss": 1.0136, - "step": 5850 - }, - { - "epoch": 0.051804310542972826, - "grad_norm": 5.76235294342041, - "learning_rate": 4.913659482428379e-05, - "loss": 0.9, - "step": 5860 - }, - { - "epoch": 0.051892713803285064, - "grad_norm": 5.628695964813232, - "learning_rate": 4.913512143661192e-05, - "loss": 1.0229, - "step": 5870 - }, - { - "epoch": 0.05198111706359731, - "grad_norm": 5.529850959777832, - "learning_rate": 4.9133648048940044e-05, - "loss": 0.8358, - "step": 5880 - }, - { - "epoch": 0.052069520323909546, - "grad_norm": 6.741031169891357, - "learning_rate": 4.913217466126818e-05, - "loss": 0.8761, - "step": 5890 - }, - { - "epoch": 0.052157923584221784, - "grad_norm": 4.22281551361084, - "learning_rate": 4.913070127359631e-05, - "loss": 1.1577, - "step": 5900 - }, - { - "epoch": 0.05224632684453403, - "grad_norm": 4.882099151611328, - "learning_rate": 4.9129227885924436e-05, - "loss": 0.7943, - "step": 5910 - }, - { - "epoch": 0.052334730104846267, - "grad_norm": 7.837223529815674, - "learning_rate": 4.9127754498252564e-05, - "loss": 0.8536, - "step": 5920 - }, - { - "epoch": 0.052423133365158504, - "grad_norm": 5.219480514526367, - "learning_rate": 4.91262811105807e-05, - "loss": 1.0275, - "step": 5930 - }, - { - "epoch": 0.05251153662547075, - "grad_norm": 4.461834907531738, - "learning_rate": 4.912480772290882e-05, - "loss": 0.8585, - "step": 5940 - }, - { - "epoch": 0.05259993988578299, - "grad_norm": 4.6096649169921875, - "learning_rate": 4.9123334335236956e-05, - "loss": 0.9949, - "step": 5950 - }, - { - "epoch": 0.052688343146095225, - "grad_norm": 6.320931911468506, - "learning_rate": 4.912186094756508e-05, - "loss": 0.888, - "step": 5960 - }, - { - "epoch": 0.05277674640640747, - "grad_norm": 6.780819892883301, - "learning_rate": 4.912038755989321e-05, - "loss": 0.9352, - "step": 5970 - }, - { - "epoch": 0.05286514966671971, - "grad_norm": 5.738193988800049, - "learning_rate": 4.911891417222134e-05, - "loss": 0.8641, - "step": 5980 - }, - { - "epoch": 0.05295355292703195, - "grad_norm": 4.92494010925293, - "learning_rate": 4.911744078454947e-05, - "loss": 0.9729, - "step": 5990 - }, - { - "epoch": 0.05304195618734419, - "grad_norm": 5.132249355316162, - "learning_rate": 4.91159673968776e-05, - "loss": 0.9417, - "step": 6000 - }, - { - "epoch": 0.05313035944765643, - "grad_norm": 6.281084060668945, - "learning_rate": 4.911449400920573e-05, - "loss": 0.9744, - "step": 6010 - }, - { - "epoch": 0.05321876270796867, - "grad_norm": 2.8937885761260986, - "learning_rate": 4.9113020621533854e-05, - "loss": 0.9816, - "step": 6020 - }, - { - "epoch": 0.05330716596828091, - "grad_norm": 5.416077136993408, - "learning_rate": 4.911154723386199e-05, - "loss": 0.9954, - "step": 6030 - }, - { - "epoch": 0.05339556922859315, - "grad_norm": 4.114582538604736, - "learning_rate": 4.911007384619012e-05, - "loss": 0.9122, - "step": 6040 - }, - { - "epoch": 0.05348397248890539, - "grad_norm": 8.810036659240723, - "learning_rate": 4.9108600458518246e-05, - "loss": 1.0605, - "step": 6050 - }, - { - "epoch": 0.05357237574921763, - "grad_norm": 10.188939094543457, - "learning_rate": 4.9107127070846374e-05, - "loss": 0.979, - "step": 6060 - }, - { - "epoch": 0.05366077900952987, - "grad_norm": 7.3219428062438965, - "learning_rate": 4.91056536831745e-05, - "loss": 1.0042, - "step": 6070 - }, - { - "epoch": 0.05374918226984211, - "grad_norm": 4.038445949554443, - "learning_rate": 4.910418029550263e-05, - "loss": 1.1042, - "step": 6080 - }, - { - "epoch": 0.05383758553015435, - "grad_norm": 4.296999454498291, - "learning_rate": 4.9102706907830766e-05, - "loss": 0.869, - "step": 6090 - }, - { - "epoch": 0.05392598879046659, - "grad_norm": 10.159406661987305, - "learning_rate": 4.910123352015889e-05, - "loss": 0.9012, - "step": 6100 - }, - { - "epoch": 0.054014392050778834, - "grad_norm": 5.289685249328613, - "learning_rate": 4.909976013248702e-05, - "loss": 0.8925, - "step": 6110 - }, - { - "epoch": 0.05410279531109107, - "grad_norm": 5.531337261199951, - "learning_rate": 4.909828674481515e-05, - "loss": 1.1365, - "step": 6120 - }, - { - "epoch": 0.054191198571403316, - "grad_norm": 6.189535617828369, - "learning_rate": 4.909681335714328e-05, - "loss": 1.1334, - "step": 6130 - }, - { - "epoch": 0.054279601831715554, - "grad_norm": 4.934144973754883, - "learning_rate": 4.909533996947141e-05, - "loss": 0.9185, - "step": 6140 - }, - { - "epoch": 0.05436800509202779, - "grad_norm": 4.428097248077393, - "learning_rate": 4.909386658179954e-05, - "loss": 0.9079, - "step": 6150 - }, - { - "epoch": 0.05445640835234004, - "grad_norm": 4.912667751312256, - "learning_rate": 4.9092393194127665e-05, - "loss": 1.002, - "step": 6160 - }, - { - "epoch": 0.054544811612652275, - "grad_norm": 7.442349910736084, - "learning_rate": 4.90909198064558e-05, - "loss": 1.0206, - "step": 6170 - }, - { - "epoch": 0.05463321487296451, - "grad_norm": 3.1982181072235107, - "learning_rate": 4.908944641878393e-05, - "loss": 1.0653, - "step": 6180 - }, - { - "epoch": 0.05472161813327676, - "grad_norm": 5.053394794464111, - "learning_rate": 4.9087973031112057e-05, - "loss": 1.021, - "step": 6190 - }, - { - "epoch": 0.054810021393588995, - "grad_norm": 9.006791114807129, - "learning_rate": 4.9086499643440185e-05, - "loss": 0.9457, - "step": 6200 - }, - { - "epoch": 0.05489842465390123, - "grad_norm": 7.753384113311768, - "learning_rate": 4.908502625576831e-05, - "loss": 0.9833, - "step": 6210 - }, - { - "epoch": 0.05498682791421348, - "grad_norm": 5.772186756134033, - "learning_rate": 4.908355286809644e-05, - "loss": 0.9516, - "step": 6220 - }, - { - "epoch": 0.055075231174525716, - "grad_norm": 4.40585470199585, - "learning_rate": 4.908207948042458e-05, - "loss": 0.9132, - "step": 6230 - }, - { - "epoch": 0.05516363443483795, - "grad_norm": 6.505217552185059, - "learning_rate": 4.90806060927527e-05, - "loss": 0.8467, - "step": 6240 - }, - { - "epoch": 0.0552520376951502, - "grad_norm": 4.562171459197998, - "learning_rate": 4.9079132705080833e-05, - "loss": 0.8822, - "step": 6250 - }, - { - "epoch": 0.055340440955462436, - "grad_norm": 7.463866710662842, - "learning_rate": 4.907765931740896e-05, - "loss": 1.0114, - "step": 6260 - }, - { - "epoch": 0.05542884421577468, - "grad_norm": 8.527029037475586, - "learning_rate": 4.907618592973709e-05, - "loss": 0.9943, - "step": 6270 - }, - { - "epoch": 0.05551724747608692, - "grad_norm": 5.194516658782959, - "learning_rate": 4.907471254206522e-05, - "loss": 0.9317, - "step": 6280 - }, - { - "epoch": 0.055605650736399156, - "grad_norm": 3.785703420639038, - "learning_rate": 4.9073239154393354e-05, - "loss": 0.8754, - "step": 6290 - }, - { - "epoch": 0.0556940539967114, - "grad_norm": 5.605743408203125, - "learning_rate": 4.9071765766721475e-05, - "loss": 1.0147, - "step": 6300 - }, - { - "epoch": 0.05578245725702364, - "grad_norm": 6.450657844543457, - "learning_rate": 4.907029237904961e-05, - "loss": 1.0177, - "step": 6310 - }, - { - "epoch": 0.05587086051733588, - "grad_norm": 3.6782920360565186, - "learning_rate": 4.906881899137773e-05, - "loss": 0.9223, - "step": 6320 - }, - { - "epoch": 0.05595926377764812, - "grad_norm": 10.444466590881348, - "learning_rate": 4.906734560370587e-05, - "loss": 0.9946, - "step": 6330 - }, - { - "epoch": 0.05604766703796036, - "grad_norm": 3.6840245723724365, - "learning_rate": 4.9065872216033995e-05, - "loss": 0.9282, - "step": 6340 - }, - { - "epoch": 0.0561360702982726, - "grad_norm": 6.504221439361572, - "learning_rate": 4.9064398828362124e-05, - "loss": 1.0093, - "step": 6350 - }, - { - "epoch": 0.05622447355858484, - "grad_norm": 5.361945629119873, - "learning_rate": 4.906292544069025e-05, - "loss": 0.945, - "step": 6360 - }, - { - "epoch": 0.05631287681889708, - "grad_norm": 3.5966508388519287, - "learning_rate": 4.906145205301839e-05, - "loss": 0.9992, - "step": 6370 - }, - { - "epoch": 0.056401280079209325, - "grad_norm": 5.785979747772217, - "learning_rate": 4.905997866534651e-05, - "loss": 0.9951, - "step": 6380 - }, - { - "epoch": 0.05648968333952156, - "grad_norm": 6.949944972991943, - "learning_rate": 4.9058505277674644e-05, - "loss": 0.8968, - "step": 6390 - }, - { - "epoch": 0.0565780865998338, - "grad_norm": 9.668983459472656, - "learning_rate": 4.905703189000277e-05, - "loss": 0.9029, - "step": 6400 - }, - { - "epoch": 0.056666489860146045, - "grad_norm": 5.306529521942139, - "learning_rate": 4.90555585023309e-05, - "loss": 0.8663, - "step": 6410 - }, - { - "epoch": 0.05675489312045828, - "grad_norm": 4.871407508850098, - "learning_rate": 4.905408511465903e-05, - "loss": 0.9431, - "step": 6420 - }, - { - "epoch": 0.05684329638077052, - "grad_norm": 4.606576442718506, - "learning_rate": 4.905261172698716e-05, - "loss": 1.0039, - "step": 6430 - }, - { - "epoch": 0.056931699641082766, - "grad_norm": 2.390242338180542, - "learning_rate": 4.9051138339315286e-05, - "loss": 0.9147, - "step": 6440 - }, - { - "epoch": 0.057020102901395, - "grad_norm": 4.913139343261719, - "learning_rate": 4.904966495164342e-05, - "loss": 1.0337, - "step": 6450 - }, - { - "epoch": 0.05710850616170724, - "grad_norm": 5.77178955078125, - "learning_rate": 4.904819156397155e-05, - "loss": 0.8926, - "step": 6460 - }, - { - "epoch": 0.057196909422019486, - "grad_norm": 3.674722194671631, - "learning_rate": 4.904671817629968e-05, - "loss": 0.9608, - "step": 6470 - }, - { - "epoch": 0.057285312682331724, - "grad_norm": 13.199363708496094, - "learning_rate": 4.9045244788627806e-05, - "loss": 1.2117, - "step": 6480 - }, - { - "epoch": 0.05737371594264396, - "grad_norm": 3.5203678607940674, - "learning_rate": 4.9043771400955934e-05, - "loss": 1.0523, - "step": 6490 - }, - { - "epoch": 0.057462119202956206, - "grad_norm": 5.5043110847473145, - "learning_rate": 4.904229801328406e-05, - "loss": 0.9041, - "step": 6500 - }, - { - "epoch": 0.057550522463268444, - "grad_norm": 3.238118886947632, - "learning_rate": 4.90408246256122e-05, - "loss": 0.9177, - "step": 6510 - }, - { - "epoch": 0.05763892572358069, - "grad_norm": 3.886664628982544, - "learning_rate": 4.9039351237940326e-05, - "loss": 1.0162, - "step": 6520 - }, - { - "epoch": 0.05772732898389293, - "grad_norm": 6.0458173751831055, - "learning_rate": 4.9037877850268455e-05, - "loss": 0.9505, - "step": 6530 - }, - { - "epoch": 0.057815732244205165, - "grad_norm": 3.132824182510376, - "learning_rate": 4.903640446259658e-05, - "loss": 0.8276, - "step": 6540 - }, - { - "epoch": 0.05790413550451741, - "grad_norm": 6.579557418823242, - "learning_rate": 4.903493107492471e-05, - "loss": 1.0451, - "step": 6550 - }, - { - "epoch": 0.05799253876482965, - "grad_norm": 5.665095329284668, - "learning_rate": 4.903345768725284e-05, - "loss": 1.0314, - "step": 6560 - }, - { - "epoch": 0.058080942025141885, - "grad_norm": 9.765207290649414, - "learning_rate": 4.903198429958097e-05, - "loss": 1.0061, - "step": 6570 - }, - { - "epoch": 0.05816934528545413, - "grad_norm": 6.479765892028809, - "learning_rate": 4.90305109119091e-05, - "loss": 0.9157, - "step": 6580 - }, - { - "epoch": 0.05825774854576637, - "grad_norm": 6.944589614868164, - "learning_rate": 4.902903752423723e-05, - "loss": 1.0337, - "step": 6590 - }, - { - "epoch": 0.058346151806078606, - "grad_norm": 5.715635776519775, - "learning_rate": 4.902756413656536e-05, - "loss": 0.9518, - "step": 6600 - }, - { - "epoch": 0.05843455506639085, - "grad_norm": 3.145275115966797, - "learning_rate": 4.902609074889349e-05, - "loss": 0.908, - "step": 6610 - }, - { - "epoch": 0.05852295832670309, - "grad_norm": 9.593637466430664, - "learning_rate": 4.9024617361221616e-05, - "loss": 0.9803, - "step": 6620 - }, - { - "epoch": 0.058611361587015326, - "grad_norm": 9.412375450134277, - "learning_rate": 4.9023143973549745e-05, - "loss": 1.0062, - "step": 6630 - }, - { - "epoch": 0.05869976484732757, - "grad_norm": 5.172863006591797, - "learning_rate": 4.902167058587788e-05, - "loss": 0.9477, - "step": 6640 - }, - { - "epoch": 0.05878816810763981, - "grad_norm": 5.577682971954346, - "learning_rate": 4.902019719820601e-05, - "loss": 1.0084, - "step": 6650 - }, - { - "epoch": 0.05887657136795205, - "grad_norm": 5.950476169586182, - "learning_rate": 4.901872381053414e-05, - "loss": 1.0198, - "step": 6660 - }, - { - "epoch": 0.05896497462826429, - "grad_norm": 6.371071815490723, - "learning_rate": 4.9017250422862265e-05, - "loss": 1.0459, - "step": 6670 - }, - { - "epoch": 0.05905337788857653, - "grad_norm": 3.299262523651123, - "learning_rate": 4.901577703519039e-05, - "loss": 0.975, - "step": 6680 - }, - { - "epoch": 0.059141781148888774, - "grad_norm": 3.839036226272583, - "learning_rate": 4.901430364751852e-05, - "loss": 0.9142, - "step": 6690 - }, - { - "epoch": 0.05923018440920101, - "grad_norm": 4.645992279052734, - "learning_rate": 4.901283025984666e-05, - "loss": 0.9328, - "step": 6700 - }, - { - "epoch": 0.05931858766951325, - "grad_norm": 7.860993385314941, - "learning_rate": 4.901135687217478e-05, - "loss": 1.0465, - "step": 6710 - }, - { - "epoch": 0.059406990929825494, - "grad_norm": 3.80489444732666, - "learning_rate": 4.9009883484502914e-05, - "loss": 0.9375, - "step": 6720 - }, - { - "epoch": 0.05949539419013773, - "grad_norm": 5.162045955657959, - "learning_rate": 4.900841009683104e-05, - "loss": 0.8921, - "step": 6730 - }, - { - "epoch": 0.05958379745044997, - "grad_norm": 2.4445645809173584, - "learning_rate": 4.900693670915917e-05, - "loss": 1.0937, - "step": 6740 - }, - { - "epoch": 0.059672200710762215, - "grad_norm": 3.2874808311462402, - "learning_rate": 4.90054633214873e-05, - "loss": 0.957, - "step": 6750 - }, - { - "epoch": 0.05976060397107445, - "grad_norm": 4.9104132652282715, - "learning_rate": 4.9003989933815434e-05, - "loss": 0.917, - "step": 6760 - }, - { - "epoch": 0.05984900723138669, - "grad_norm": 5.925168514251709, - "learning_rate": 4.9002516546143555e-05, - "loss": 0.9887, - "step": 6770 - }, - { - "epoch": 0.059937410491698935, - "grad_norm": 6.531099796295166, - "learning_rate": 4.900104315847169e-05, - "loss": 0.9383, - "step": 6780 - }, - { - "epoch": 0.06002581375201117, - "grad_norm": 6.779243469238281, - "learning_rate": 4.899956977079981e-05, - "loss": 1.0173, - "step": 6790 - }, - { - "epoch": 0.06011421701232342, - "grad_norm": 5.83915376663208, - "learning_rate": 4.899809638312795e-05, - "loss": 0.9161, - "step": 6800 - }, - { - "epoch": 0.060202620272635655, - "grad_norm": 4.769713401794434, - "learning_rate": 4.8996622995456076e-05, - "loss": 0.9705, - "step": 6810 - }, - { - "epoch": 0.06029102353294789, - "grad_norm": 4.014643669128418, - "learning_rate": 4.8995149607784204e-05, - "loss": 0.9136, - "step": 6820 - }, - { - "epoch": 0.06037942679326014, - "grad_norm": 9.945269584655762, - "learning_rate": 4.899367622011233e-05, - "loss": 0.9235, - "step": 6830 - }, - { - "epoch": 0.060467830053572376, - "grad_norm": 4.868852138519287, - "learning_rate": 4.899220283244047e-05, - "loss": 0.9456, - "step": 6840 - }, - { - "epoch": 0.060556233313884614, - "grad_norm": 5.041273593902588, - "learning_rate": 4.899072944476859e-05, - "loss": 0.9664, - "step": 6850 - }, - { - "epoch": 0.06064463657419686, - "grad_norm": 5.9616169929504395, - "learning_rate": 4.8989256057096724e-05, - "loss": 0.9784, - "step": 6860 - }, - { - "epoch": 0.060733039834509096, - "grad_norm": 5.6391072273254395, - "learning_rate": 4.898778266942485e-05, - "loss": 1.113, - "step": 6870 - }, - { - "epoch": 0.060821443094821334, - "grad_norm": 3.411214590072632, - "learning_rate": 4.898630928175298e-05, - "loss": 0.9794, - "step": 6880 - }, - { - "epoch": 0.06090984635513358, - "grad_norm": 7.82897424697876, - "learning_rate": 4.898483589408111e-05, - "loss": 0.8892, - "step": 6890 - }, - { - "epoch": 0.06099824961544582, - "grad_norm": 6.4274396896362305, - "learning_rate": 4.898336250640924e-05, - "loss": 0.9396, - "step": 6900 - }, - { - "epoch": 0.061086652875758055, - "grad_norm": 8.304560661315918, - "learning_rate": 4.8981889118737366e-05, - "loss": 1.0109, - "step": 6910 - }, - { - "epoch": 0.0611750561360703, - "grad_norm": 5.554924488067627, - "learning_rate": 4.89804157310655e-05, - "loss": 0.886, - "step": 6920 - }, - { - "epoch": 0.06126345939638254, - "grad_norm": 5.120604515075684, - "learning_rate": 4.897894234339362e-05, - "loss": 1.0324, - "step": 6930 - }, - { - "epoch": 0.06135186265669478, - "grad_norm": 3.963286876678467, - "learning_rate": 4.897746895572176e-05, - "loss": 0.9292, - "step": 6940 - }, - { - "epoch": 0.06144026591700702, - "grad_norm": 3.5170414447784424, - "learning_rate": 4.8975995568049886e-05, - "loss": 0.873, - "step": 6950 - }, - { - "epoch": 0.06152866917731926, - "grad_norm": 6.453105926513672, - "learning_rate": 4.8974522180378014e-05, - "loss": 0.9063, - "step": 6960 - }, - { - "epoch": 0.0616170724376315, - "grad_norm": 8.583768844604492, - "learning_rate": 4.897304879270614e-05, - "loss": 1.0464, - "step": 6970 - }, - { - "epoch": 0.06170547569794374, - "grad_norm": 4.491311550140381, - "learning_rate": 4.897157540503428e-05, - "loss": 0.9401, - "step": 6980 - }, - { - "epoch": 0.06179387895825598, - "grad_norm": 7.161159038543701, - "learning_rate": 4.89701020173624e-05, - "loss": 0.9634, - "step": 6990 - }, - { - "epoch": 0.06188228221856822, - "grad_norm": 6.075889587402344, - "learning_rate": 4.8968628629690535e-05, - "loss": 0.9824, - "step": 7000 - }, - { - "epoch": 0.06197068547888046, - "grad_norm": 6.824237823486328, - "learning_rate": 4.8967155242018656e-05, - "loss": 0.8896, - "step": 7010 - }, - { - "epoch": 0.0620590887391927, - "grad_norm": 4.351720333099365, - "learning_rate": 4.896568185434679e-05, - "loss": 0.8572, - "step": 7020 - }, - { - "epoch": 0.06214749199950494, - "grad_norm": 7.742468357086182, - "learning_rate": 4.896420846667492e-05, - "loss": 0.92, - "step": 7030 - }, - { - "epoch": 0.06223589525981718, - "grad_norm": 9.736001968383789, - "learning_rate": 4.896273507900305e-05, - "loss": 0.9778, - "step": 7040 - }, - { - "epoch": 0.062324298520129426, - "grad_norm": 7.4972920417785645, - "learning_rate": 4.8961261691331176e-05, - "loss": 0.9763, - "step": 7050 - }, - { - "epoch": 0.062412701780441664, - "grad_norm": 4.487346172332764, - "learning_rate": 4.895978830365931e-05, - "loss": 1.0487, - "step": 7060 - }, - { - "epoch": 0.0625011050407539, - "grad_norm": 3.527085304260254, - "learning_rate": 4.895831491598743e-05, - "loss": 0.9339, - "step": 7070 - }, - { - "epoch": 0.06258950830106615, - "grad_norm": 4.565359592437744, - "learning_rate": 4.895684152831557e-05, - "loss": 0.9336, - "step": 7080 - }, - { - "epoch": 0.06267791156137838, - "grad_norm": 7.8365936279296875, - "learning_rate": 4.8955368140643697e-05, - "loss": 0.8912, - "step": 7090 - }, - { - "epoch": 0.06276631482169062, - "grad_norm": 6.922832489013672, - "learning_rate": 4.8953894752971825e-05, - "loss": 0.9872, - "step": 7100 - }, - { - "epoch": 0.06285471808200287, - "grad_norm": 5.70310640335083, - "learning_rate": 4.895242136529995e-05, - "loss": 0.9987, - "step": 7110 - }, - { - "epoch": 0.06294312134231511, - "grad_norm": 4.13558292388916, - "learning_rate": 4.895094797762809e-05, - "loss": 0.9326, - "step": 7120 - }, - { - "epoch": 0.06303152460262734, - "grad_norm": 7.350679874420166, - "learning_rate": 4.894947458995621e-05, - "loss": 0.9005, - "step": 7130 - }, - { - "epoch": 0.06311992786293959, - "grad_norm": 4.997514247894287, - "learning_rate": 4.8948001202284345e-05, - "loss": 0.9472, - "step": 7140 - }, - { - "epoch": 0.06320833112325183, - "grad_norm": 8.707229614257812, - "learning_rate": 4.894652781461247e-05, - "loss": 0.8869, - "step": 7150 - }, - { - "epoch": 0.06329673438356406, - "grad_norm": 5.310278415679932, - "learning_rate": 4.89450544269406e-05, - "loss": 1.0221, - "step": 7160 - }, - { - "epoch": 0.06338513764387631, - "grad_norm": 7.762638568878174, - "learning_rate": 4.894358103926873e-05, - "loss": 1.0534, - "step": 7170 - }, - { - "epoch": 0.06347354090418855, - "grad_norm": 7.497394561767578, - "learning_rate": 4.894210765159686e-05, - "loss": 0.9206, - "step": 7180 - }, - { - "epoch": 0.06356194416450078, - "grad_norm": 3.281755208969116, - "learning_rate": 4.894063426392499e-05, - "loss": 0.8431, - "step": 7190 - }, - { - "epoch": 0.06365034742481303, - "grad_norm": 4.9097185134887695, - "learning_rate": 4.893916087625312e-05, - "loss": 1.0359, - "step": 7200 - }, - { - "epoch": 0.06373875068512527, - "grad_norm": 4.54490852355957, - "learning_rate": 4.8937687488581244e-05, - "loss": 0.9101, - "step": 7210 - }, - { - "epoch": 0.0638271539454375, - "grad_norm": 7.325924873352051, - "learning_rate": 4.893621410090938e-05, - "loss": 0.942, - "step": 7220 - }, - { - "epoch": 0.06391555720574975, - "grad_norm": 4.005319118499756, - "learning_rate": 4.893474071323751e-05, - "loss": 1.0018, - "step": 7230 - }, - { - "epoch": 0.064003960466062, - "grad_norm": 5.555293560028076, - "learning_rate": 4.8933267325565635e-05, - "loss": 0.9417, - "step": 7240 - }, - { - "epoch": 0.06409236372637422, - "grad_norm": 6.122361660003662, - "learning_rate": 4.8931793937893764e-05, - "loss": 0.9577, - "step": 7250 - }, - { - "epoch": 0.06418076698668647, - "grad_norm": 6.8876166343688965, - "learning_rate": 4.893032055022189e-05, - "loss": 0.9515, - "step": 7260 - }, - { - "epoch": 0.06426917024699871, - "grad_norm": 3.8940982818603516, - "learning_rate": 4.892884716255002e-05, - "loss": 0.942, - "step": 7270 - }, - { - "epoch": 0.06435757350731094, - "grad_norm": 3.3218398094177246, - "learning_rate": 4.8927373774878156e-05, - "loss": 1.0395, - "step": 7280 - }, - { - "epoch": 0.06444597676762319, - "grad_norm": 5.618769645690918, - "learning_rate": 4.892590038720628e-05, - "loss": 0.9807, - "step": 7290 - }, - { - "epoch": 0.06453438002793543, - "grad_norm": 3.878244400024414, - "learning_rate": 4.892442699953441e-05, - "loss": 0.9813, - "step": 7300 - }, - { - "epoch": 0.06462278328824766, - "grad_norm": 8.489561080932617, - "learning_rate": 4.892295361186254e-05, - "loss": 0.9528, - "step": 7310 - }, - { - "epoch": 0.06471118654855991, - "grad_norm": 7.555717468261719, - "learning_rate": 4.892148022419067e-05, - "loss": 0.8856, - "step": 7320 - }, - { - "epoch": 0.06479958980887215, - "grad_norm": 5.251434803009033, - "learning_rate": 4.89200068365188e-05, - "loss": 0.9103, - "step": 7330 - }, - { - "epoch": 0.06488799306918439, - "grad_norm": 10.57512378692627, - "learning_rate": 4.891853344884693e-05, - "loss": 0.9906, - "step": 7340 - }, - { - "epoch": 0.06497639632949663, - "grad_norm": 6.348610877990723, - "learning_rate": 4.8917060061175054e-05, - "loss": 0.7835, - "step": 7350 - }, - { - "epoch": 0.06506479958980887, - "grad_norm": 3.2515549659729004, - "learning_rate": 4.891558667350319e-05, - "loss": 0.8747, - "step": 7360 - }, - { - "epoch": 0.0651532028501211, - "grad_norm": 5.83371639251709, - "learning_rate": 4.891411328583132e-05, - "loss": 0.9333, - "step": 7370 - }, - { - "epoch": 0.06524160611043335, - "grad_norm": 12.981037139892578, - "learning_rate": 4.8912639898159446e-05, - "loss": 1.1068, - "step": 7380 - }, - { - "epoch": 0.0653300093707456, - "grad_norm": 5.9547271728515625, - "learning_rate": 4.8911166510487574e-05, - "loss": 1.0265, - "step": 7390 - }, - { - "epoch": 0.06541841263105784, - "grad_norm": 7.136798858642578, - "learning_rate": 4.89096931228157e-05, - "loss": 0.8714, - "step": 7400 - }, - { - "epoch": 0.06550681589137007, - "grad_norm": 7.322783946990967, - "learning_rate": 4.890821973514383e-05, - "loss": 0.9294, - "step": 7410 - }, - { - "epoch": 0.06559521915168232, - "grad_norm": 10.76966667175293, - "learning_rate": 4.8906746347471966e-05, - "loss": 0.9084, - "step": 7420 - }, - { - "epoch": 0.06568362241199456, - "grad_norm": 5.148689270019531, - "learning_rate": 4.8905272959800094e-05, - "loss": 1.021, - "step": 7430 - }, - { - "epoch": 0.06577202567230679, - "grad_norm": 3.7091686725616455, - "learning_rate": 4.890379957212822e-05, - "loss": 0.9651, - "step": 7440 - }, - { - "epoch": 0.06586042893261904, - "grad_norm": 3.9058432579040527, - "learning_rate": 4.890232618445635e-05, - "loss": 0.8596, - "step": 7450 - }, - { - "epoch": 0.06594883219293128, - "grad_norm": 5.251888275146484, - "learning_rate": 4.890085279678448e-05, - "loss": 0.9971, - "step": 7460 - }, - { - "epoch": 0.06603723545324351, - "grad_norm": 3.484361171722412, - "learning_rate": 4.889937940911261e-05, - "loss": 0.9627, - "step": 7470 - }, - { - "epoch": 0.06612563871355576, - "grad_norm": 4.799646854400635, - "learning_rate": 4.889790602144074e-05, - "loss": 1.0495, - "step": 7480 - }, - { - "epoch": 0.066214041973868, - "grad_norm": 3.453781843185425, - "learning_rate": 4.889643263376887e-05, - "loss": 1.0594, - "step": 7490 - }, - { - "epoch": 0.06630244523418023, - "grad_norm": 9.214920043945312, - "learning_rate": 4.8894959246097e-05, - "loss": 0.853, - "step": 7500 - }, - { - "epoch": 0.06639084849449248, - "grad_norm": 10.193352699279785, - "learning_rate": 4.889348585842513e-05, - "loss": 0.7956, - "step": 7510 - }, - { - "epoch": 0.06647925175480472, - "grad_norm": 5.335453510284424, - "learning_rate": 4.8892012470753256e-05, - "loss": 0.9247, - "step": 7520 - }, - { - "epoch": 0.06656765501511695, - "grad_norm": 3.7129592895507812, - "learning_rate": 4.8890539083081385e-05, - "loss": 1.0042, - "step": 7530 - }, - { - "epoch": 0.0666560582754292, - "grad_norm": 3.840801954269409, - "learning_rate": 4.888906569540951e-05, - "loss": 0.9194, - "step": 7540 - }, - { - "epoch": 0.06674446153574144, - "grad_norm": 3.651582717895508, - "learning_rate": 4.888759230773765e-05, - "loss": 0.9767, - "step": 7550 - }, - { - "epoch": 0.06683286479605367, - "grad_norm": 3.655207872390747, - "learning_rate": 4.8886118920065777e-05, - "loss": 0.9537, - "step": 7560 - }, - { - "epoch": 0.06692126805636592, - "grad_norm": 5.665995121002197, - "learning_rate": 4.8884645532393905e-05, - "loss": 0.9313, - "step": 7570 - }, - { - "epoch": 0.06700967131667816, - "grad_norm": 6.286605358123779, - "learning_rate": 4.888317214472203e-05, - "loss": 1.0161, - "step": 7580 - }, - { - "epoch": 0.0670980745769904, - "grad_norm": 8.228954315185547, - "learning_rate": 4.888169875705016e-05, - "loss": 1.0193, - "step": 7590 - }, - { - "epoch": 0.06718647783730264, - "grad_norm": 8.809334754943848, - "learning_rate": 4.888022536937829e-05, - "loss": 1.1218, - "step": 7600 - }, - { - "epoch": 0.06727488109761488, - "grad_norm": 8.462353706359863, - "learning_rate": 4.8878751981706425e-05, - "loss": 0.8927, - "step": 7610 - }, - { - "epoch": 0.06736328435792711, - "grad_norm": 7.429812431335449, - "learning_rate": 4.887727859403455e-05, - "loss": 0.9888, - "step": 7620 - }, - { - "epoch": 0.06745168761823936, - "grad_norm": 3.1937925815582275, - "learning_rate": 4.887580520636268e-05, - "loss": 0.9499, - "step": 7630 - }, - { - "epoch": 0.0675400908785516, - "grad_norm": 3.0585787296295166, - "learning_rate": 4.887433181869081e-05, - "loss": 0.9619, - "step": 7640 - }, - { - "epoch": 0.06762849413886383, - "grad_norm": 4.329367637634277, - "learning_rate": 4.887285843101894e-05, - "loss": 1.0216, - "step": 7650 - }, - { - "epoch": 0.06771689739917608, - "grad_norm": 12.496232986450195, - "learning_rate": 4.887138504334707e-05, - "loss": 1.0454, - "step": 7660 - }, - { - "epoch": 0.06780530065948832, - "grad_norm": 4.210485935211182, - "learning_rate": 4.88699116556752e-05, - "loss": 0.8951, - "step": 7670 - }, - { - "epoch": 0.06789370391980057, - "grad_norm": 3.4743120670318604, - "learning_rate": 4.8868438268003324e-05, - "loss": 0.8772, - "step": 7680 - }, - { - "epoch": 0.0679821071801128, - "grad_norm": 4.501906871795654, - "learning_rate": 4.886696488033146e-05, - "loss": 1.0451, - "step": 7690 - }, - { - "epoch": 0.06807051044042504, - "grad_norm": 3.1872076988220215, - "learning_rate": 4.886549149265959e-05, - "loss": 0.9059, - "step": 7700 - }, - { - "epoch": 0.06815891370073729, - "grad_norm": 5.285301685333252, - "learning_rate": 4.8864018104987715e-05, - "loss": 0.8134, - "step": 7710 - }, - { - "epoch": 0.06824731696104952, - "grad_norm": 5.507226943969727, - "learning_rate": 4.8862544717315844e-05, - "loss": 0.8476, - "step": 7720 - }, - { - "epoch": 0.06833572022136176, - "grad_norm": 7.218921661376953, - "learning_rate": 4.886107132964397e-05, - "loss": 1.0962, - "step": 7730 - }, - { - "epoch": 0.06842412348167401, - "grad_norm": 4.271151542663574, - "learning_rate": 4.88595979419721e-05, - "loss": 1.1046, - "step": 7740 - }, - { - "epoch": 0.06851252674198624, - "grad_norm": 4.690978050231934, - "learning_rate": 4.8858124554300236e-05, - "loss": 0.9518, - "step": 7750 - }, - { - "epoch": 0.06860093000229849, - "grad_norm": 3.4593546390533447, - "learning_rate": 4.885665116662836e-05, - "loss": 0.9038, - "step": 7760 - }, - { - "epoch": 0.06868933326261073, - "grad_norm": 3.535809278488159, - "learning_rate": 4.885517777895649e-05, - "loss": 0.8462, - "step": 7770 - }, - { - "epoch": 0.06877773652292296, - "grad_norm": 6.570487976074219, - "learning_rate": 4.885370439128462e-05, - "loss": 0.8509, - "step": 7780 - }, - { - "epoch": 0.0688661397832352, - "grad_norm": 4.717918395996094, - "learning_rate": 4.885223100361275e-05, - "loss": 0.9433, - "step": 7790 - }, - { - "epoch": 0.06895454304354745, - "grad_norm": 3.3045620918273926, - "learning_rate": 4.885075761594088e-05, - "loss": 1.032, - "step": 7800 - }, - { - "epoch": 0.06904294630385968, - "grad_norm": 3.694939136505127, - "learning_rate": 4.884928422826901e-05, - "loss": 0.8535, - "step": 7810 - }, - { - "epoch": 0.06913134956417193, - "grad_norm": 4.511366844177246, - "learning_rate": 4.8847810840597134e-05, - "loss": 0.8749, - "step": 7820 - }, - { - "epoch": 0.06921975282448417, - "grad_norm": 6.869948863983154, - "learning_rate": 4.884633745292527e-05, - "loss": 0.8407, - "step": 7830 - }, - { - "epoch": 0.0693081560847964, - "grad_norm": 5.6310272216796875, - "learning_rate": 4.884486406525339e-05, - "loss": 0.9186, - "step": 7840 - }, - { - "epoch": 0.06939655934510865, - "grad_norm": 2.3454859256744385, - "learning_rate": 4.8843390677581526e-05, - "loss": 0.9441, - "step": 7850 - }, - { - "epoch": 0.06948496260542089, - "grad_norm": 3.726121664047241, - "learning_rate": 4.8841917289909654e-05, - "loss": 0.9089, - "step": 7860 - }, - { - "epoch": 0.06957336586573312, - "grad_norm": 5.054235935211182, - "learning_rate": 4.884044390223778e-05, - "loss": 0.9131, - "step": 7870 - }, - { - "epoch": 0.06966176912604537, - "grad_norm": 5.308077812194824, - "learning_rate": 4.883897051456591e-05, - "loss": 0.9014, - "step": 7880 - }, - { - "epoch": 0.06975017238635761, - "grad_norm": 3.4830482006073, - "learning_rate": 4.8837497126894046e-05, - "loss": 0.9266, - "step": 7890 - }, - { - "epoch": 0.06983857564666984, - "grad_norm": 2.1684446334838867, - "learning_rate": 4.883602373922217e-05, - "loss": 0.8765, - "step": 7900 - }, - { - "epoch": 0.06992697890698209, - "grad_norm": 7.7253875732421875, - "learning_rate": 4.88345503515503e-05, - "loss": 0.8887, - "step": 7910 - }, - { - "epoch": 0.07001538216729433, - "grad_norm": 6.309045791625977, - "learning_rate": 4.883307696387843e-05, - "loss": 0.9195, - "step": 7920 - }, - { - "epoch": 0.07010378542760658, - "grad_norm": 7.049060344696045, - "learning_rate": 4.883160357620656e-05, - "loss": 0.95, - "step": 7930 - }, - { - "epoch": 0.07019218868791881, - "grad_norm": 5.963956832885742, - "learning_rate": 4.883013018853469e-05, - "loss": 0.9175, - "step": 7940 - }, - { - "epoch": 0.07028059194823105, - "grad_norm": 7.502468109130859, - "learning_rate": 4.882865680086282e-05, - "loss": 0.8839, - "step": 7950 - }, - { - "epoch": 0.0703689952085433, - "grad_norm": 11.2520751953125, - "learning_rate": 4.8827183413190945e-05, - "loss": 0.8784, - "step": 7960 - }, - { - "epoch": 0.07045739846885553, - "grad_norm": 6.546641826629639, - "learning_rate": 4.882571002551908e-05, - "loss": 1.1201, - "step": 7970 - }, - { - "epoch": 0.07054580172916777, - "grad_norm": 3.698744297027588, - "learning_rate": 4.88242366378472e-05, - "loss": 1.0061, - "step": 7980 - }, - { - "epoch": 0.07063420498948002, - "grad_norm": 5.63902473449707, - "learning_rate": 4.8822763250175336e-05, - "loss": 0.9939, - "step": 7990 - }, - { - "epoch": 0.07072260824979225, - "grad_norm": 5.437800884246826, - "learning_rate": 4.8821289862503465e-05, - "loss": 0.9739, - "step": 8000 - }, - { - "epoch": 0.0708110115101045, - "grad_norm": 11.084389686584473, - "learning_rate": 4.881981647483159e-05, - "loss": 0.9, - "step": 8010 - }, - { - "epoch": 0.07089941477041674, - "grad_norm": 8.50456428527832, - "learning_rate": 4.881834308715972e-05, - "loss": 0.9302, - "step": 8020 - }, - { - "epoch": 0.07098781803072897, - "grad_norm": 3.414212465286255, - "learning_rate": 4.881686969948786e-05, - "loss": 0.8557, - "step": 8030 - }, - { - "epoch": 0.07107622129104121, - "grad_norm": 5.6906890869140625, - "learning_rate": 4.881539631181598e-05, - "loss": 1.0758, - "step": 8040 - }, - { - "epoch": 0.07116462455135346, - "grad_norm": 3.445957899093628, - "learning_rate": 4.881392292414411e-05, - "loss": 0.7803, - "step": 8050 - }, - { - "epoch": 0.07125302781166569, - "grad_norm": 5.444856643676758, - "learning_rate": 4.881244953647224e-05, - "loss": 1.0992, - "step": 8060 - }, - { - "epoch": 0.07134143107197793, - "grad_norm": 3.4007680416107178, - "learning_rate": 4.881097614880037e-05, - "loss": 0.8427, - "step": 8070 - }, - { - "epoch": 0.07142983433229018, - "grad_norm": 5.089715957641602, - "learning_rate": 4.88095027611285e-05, - "loss": 0.8963, - "step": 8080 - }, - { - "epoch": 0.07151823759260241, - "grad_norm": 4.008412837982178, - "learning_rate": 4.880802937345663e-05, - "loss": 0.978, - "step": 8090 - }, - { - "epoch": 0.07160664085291465, - "grad_norm": 5.542277812957764, - "learning_rate": 4.8806555985784755e-05, - "loss": 1.0073, - "step": 8100 - }, - { - "epoch": 0.0716950441132269, - "grad_norm": 3.897390127182007, - "learning_rate": 4.880508259811289e-05, - "loss": 0.9959, - "step": 8110 - }, - { - "epoch": 0.07178344737353913, - "grad_norm": 4.909268856048584, - "learning_rate": 4.880360921044101e-05, - "loss": 1.0364, - "step": 8120 - }, - { - "epoch": 0.07187185063385138, - "grad_norm": 4.000115394592285, - "learning_rate": 4.880213582276915e-05, - "loss": 0.8925, - "step": 8130 - }, - { - "epoch": 0.07196025389416362, - "grad_norm": 2.9951322078704834, - "learning_rate": 4.8800662435097275e-05, - "loss": 0.8361, - "step": 8140 - }, - { - "epoch": 0.07204865715447585, - "grad_norm": 3.519526481628418, - "learning_rate": 4.8799189047425404e-05, - "loss": 0.8563, - "step": 8150 - }, - { - "epoch": 0.0721370604147881, - "grad_norm": 7.9687299728393555, - "learning_rate": 4.879771565975353e-05, - "loss": 0.9414, - "step": 8160 - }, - { - "epoch": 0.07222546367510034, - "grad_norm": 6.15947151184082, - "learning_rate": 4.879624227208167e-05, - "loss": 0.9849, - "step": 8170 - }, - { - "epoch": 0.07231386693541257, - "grad_norm": 3.434873342514038, - "learning_rate": 4.879476888440979e-05, - "loss": 0.9088, - "step": 8180 - }, - { - "epoch": 0.07240227019572482, - "grad_norm": 6.331581115722656, - "learning_rate": 4.8793295496737924e-05, - "loss": 0.966, - "step": 8190 - }, - { - "epoch": 0.07249067345603706, - "grad_norm": 2.9577791690826416, - "learning_rate": 4.8791822109066045e-05, - "loss": 0.9171, - "step": 8200 - }, - { - "epoch": 0.0725790767163493, - "grad_norm": 3.063281297683716, - "learning_rate": 4.879034872139418e-05, - "loss": 0.9952, - "step": 8210 - }, - { - "epoch": 0.07266747997666154, - "grad_norm": 6.271945953369141, - "learning_rate": 4.878887533372231e-05, - "loss": 0.8675, - "step": 8220 - }, - { - "epoch": 0.07275588323697378, - "grad_norm": 2.9355950355529785, - "learning_rate": 4.878740194605044e-05, - "loss": 0.8829, - "step": 8230 - }, - { - "epoch": 0.07284428649728603, - "grad_norm": 10.568734169006348, - "learning_rate": 4.8785928558378566e-05, - "loss": 0.9237, - "step": 8240 - }, - { - "epoch": 0.07293268975759826, - "grad_norm": 4.638792037963867, - "learning_rate": 4.87844551707067e-05, - "loss": 0.9894, - "step": 8250 - }, - { - "epoch": 0.0730210930179105, - "grad_norm": 12.097837448120117, - "learning_rate": 4.878298178303482e-05, - "loss": 0.9352, - "step": 8260 - }, - { - "epoch": 0.07310949627822275, - "grad_norm": 6.81761360168457, - "learning_rate": 4.878150839536296e-05, - "loss": 0.9865, - "step": 8270 - }, - { - "epoch": 0.07319789953853498, - "grad_norm": 4.1839823722839355, - "learning_rate": 4.8780035007691086e-05, - "loss": 0.7932, - "step": 8280 - }, - { - "epoch": 0.07328630279884722, - "grad_norm": 3.923272132873535, - "learning_rate": 4.8778561620019214e-05, - "loss": 0.9607, - "step": 8290 - }, - { - "epoch": 0.07337470605915947, - "grad_norm": 10.800895690917969, - "learning_rate": 4.877708823234734e-05, - "loss": 1.0836, - "step": 8300 - }, - { - "epoch": 0.0734631093194717, - "grad_norm": 7.747611045837402, - "learning_rate": 4.877561484467547e-05, - "loss": 1.0398, - "step": 8310 - }, - { - "epoch": 0.07355151257978394, - "grad_norm": 4.6890339851379395, - "learning_rate": 4.87741414570036e-05, - "loss": 0.8863, - "step": 8320 - }, - { - "epoch": 0.07363991584009619, - "grad_norm": 2.836679458618164, - "learning_rate": 4.8772668069331734e-05, - "loss": 0.982, - "step": 8330 - }, - { - "epoch": 0.07372831910040842, - "grad_norm": 7.454015254974365, - "learning_rate": 4.877119468165986e-05, - "loss": 1.0465, - "step": 8340 - }, - { - "epoch": 0.07381672236072066, - "grad_norm": 3.54472279548645, - "learning_rate": 4.876972129398799e-05, - "loss": 1.0009, - "step": 8350 - }, - { - "epoch": 0.07390512562103291, - "grad_norm": 4.462231159210205, - "learning_rate": 4.876824790631612e-05, - "loss": 0.8919, - "step": 8360 - }, - { - "epoch": 0.07399352888134514, - "grad_norm": 6.081721305847168, - "learning_rate": 4.876677451864425e-05, - "loss": 0.8636, - "step": 8370 - }, - { - "epoch": 0.07408193214165738, - "grad_norm": 9.968588829040527, - "learning_rate": 4.8765301130972376e-05, - "loss": 0.9628, - "step": 8380 - }, - { - "epoch": 0.07417033540196963, - "grad_norm": 3.739511489868164, - "learning_rate": 4.876382774330051e-05, - "loss": 0.951, - "step": 8390 - }, - { - "epoch": 0.07425873866228186, - "grad_norm": 4.119143962860107, - "learning_rate": 4.876235435562864e-05, - "loss": 0.8034, - "step": 8400 - }, - { - "epoch": 0.0743471419225941, - "grad_norm": 4.583399772644043, - "learning_rate": 4.876088096795677e-05, - "loss": 0.8868, - "step": 8410 - }, - { - "epoch": 0.07443554518290635, - "grad_norm": 6.812721252441406, - "learning_rate": 4.8759407580284896e-05, - "loss": 0.9057, - "step": 8420 - }, - { - "epoch": 0.07452394844321858, - "grad_norm": 5.281586647033691, - "learning_rate": 4.8757934192613025e-05, - "loss": 0.9477, - "step": 8430 - }, - { - "epoch": 0.07461235170353082, - "grad_norm": 7.319498538970947, - "learning_rate": 4.875646080494115e-05, - "loss": 0.9479, - "step": 8440 - }, - { - "epoch": 0.07470075496384307, - "grad_norm": 4.214430332183838, - "learning_rate": 4.875498741726928e-05, - "loss": 0.8986, - "step": 8450 - }, - { - "epoch": 0.07478915822415531, - "grad_norm": 6.45574951171875, - "learning_rate": 4.8753514029597417e-05, - "loss": 0.944, - "step": 8460 - }, - { - "epoch": 0.07487756148446754, - "grad_norm": 4.877260208129883, - "learning_rate": 4.8752040641925545e-05, - "loss": 0.912, - "step": 8470 - }, - { - "epoch": 0.07496596474477979, - "grad_norm": 14.313943862915039, - "learning_rate": 4.875056725425367e-05, - "loss": 0.9442, - "step": 8480 - }, - { - "epoch": 0.07505436800509203, - "grad_norm": 3.369229555130005, - "learning_rate": 4.87490938665818e-05, - "loss": 1.0468, - "step": 8490 - }, - { - "epoch": 0.07514277126540427, - "grad_norm": 5.822304725646973, - "learning_rate": 4.874762047890993e-05, - "loss": 0.9498, - "step": 8500 - }, - { - "epoch": 0.07523117452571651, - "grad_norm": 5.878641605377197, - "learning_rate": 4.874614709123806e-05, - "loss": 0.8911, - "step": 8510 - }, - { - "epoch": 0.07531957778602875, - "grad_norm": 6.837238788604736, - "learning_rate": 4.8744673703566193e-05, - "loss": 0.9657, - "step": 8520 - }, - { - "epoch": 0.07540798104634099, - "grad_norm": 7.226667881011963, - "learning_rate": 4.874320031589432e-05, - "loss": 1.0134, - "step": 8530 - }, - { - "epoch": 0.07549638430665323, - "grad_norm": 5.0386962890625, - "learning_rate": 4.874172692822245e-05, - "loss": 0.7914, - "step": 8540 - }, - { - "epoch": 0.07558478756696548, - "grad_norm": 3.4485931396484375, - "learning_rate": 4.874025354055058e-05, - "loss": 0.9248, - "step": 8550 - }, - { - "epoch": 0.0756731908272777, - "grad_norm": 11.87322998046875, - "learning_rate": 4.873878015287871e-05, - "loss": 1.0705, - "step": 8560 - }, - { - "epoch": 0.07576159408758995, - "grad_norm": 7.415741920471191, - "learning_rate": 4.8737306765206835e-05, - "loss": 1.0196, - "step": 8570 - }, - { - "epoch": 0.0758499973479022, - "grad_norm": 6.293057441711426, - "learning_rate": 4.873583337753497e-05, - "loss": 0.8976, - "step": 8580 - }, - { - "epoch": 0.07593840060821443, - "grad_norm": 10.351290702819824, - "learning_rate": 4.873435998986309e-05, - "loss": 0.8863, - "step": 8590 - }, - { - "epoch": 0.07602680386852667, - "grad_norm": 6.024335861206055, - "learning_rate": 4.873288660219123e-05, - "loss": 0.8745, - "step": 8600 - }, - { - "epoch": 0.07611520712883892, - "grad_norm": 3.7169315814971924, - "learning_rate": 4.8731413214519355e-05, - "loss": 0.8892, - "step": 8610 - }, - { - "epoch": 0.07620361038915115, - "grad_norm": 12.834798812866211, - "learning_rate": 4.8729939826847484e-05, - "loss": 0.8989, - "step": 8620 - }, - { - "epoch": 0.07629201364946339, - "grad_norm": 4.861924648284912, - "learning_rate": 4.872846643917561e-05, - "loss": 1.0471, - "step": 8630 - }, - { - "epoch": 0.07638041690977564, - "grad_norm": 9.233909606933594, - "learning_rate": 4.872699305150375e-05, - "loss": 0.9387, - "step": 8640 - }, - { - "epoch": 0.07646882017008787, - "grad_norm": 3.8283112049102783, - "learning_rate": 4.872551966383187e-05, - "loss": 0.9259, - "step": 8650 - }, - { - "epoch": 0.07655722343040011, - "grad_norm": 4.9739556312561035, - "learning_rate": 4.8724046276160004e-05, - "loss": 0.8678, - "step": 8660 - }, - { - "epoch": 0.07664562669071236, - "grad_norm": 3.300435781478882, - "learning_rate": 4.8722572888488126e-05, - "loss": 0.9091, - "step": 8670 - }, - { - "epoch": 0.07673402995102459, - "grad_norm": 3.170722723007202, - "learning_rate": 4.872109950081626e-05, - "loss": 0.8505, - "step": 8680 - }, - { - "epoch": 0.07682243321133683, - "grad_norm": 6.202303886413574, - "learning_rate": 4.871962611314439e-05, - "loss": 0.8057, - "step": 8690 - }, - { - "epoch": 0.07691083647164908, - "grad_norm": 4.593632221221924, - "learning_rate": 4.871815272547252e-05, - "loss": 0.8931, - "step": 8700 - }, - { - "epoch": 0.07699923973196131, - "grad_norm": 4.898752212524414, - "learning_rate": 4.8716679337800646e-05, - "loss": 0.9275, - "step": 8710 - }, - { - "epoch": 0.07708764299227355, - "grad_norm": 6.235629081726074, - "learning_rate": 4.871520595012878e-05, - "loss": 1.0612, - "step": 8720 - }, - { - "epoch": 0.0771760462525858, - "grad_norm": 3.890493631362915, - "learning_rate": 4.87137325624569e-05, - "loss": 0.8512, - "step": 8730 - }, - { - "epoch": 0.07726444951289804, - "grad_norm": 4.638036251068115, - "learning_rate": 4.871225917478504e-05, - "loss": 0.9186, - "step": 8740 - }, - { - "epoch": 0.07735285277321027, - "grad_norm": 5.432823657989502, - "learning_rate": 4.8710785787113166e-05, - "loss": 0.8126, - "step": 8750 - }, - { - "epoch": 0.07744125603352252, - "grad_norm": 8.495767593383789, - "learning_rate": 4.8709312399441294e-05, - "loss": 0.9044, - "step": 8760 - }, - { - "epoch": 0.07752965929383476, - "grad_norm": 6.364109039306641, - "learning_rate": 4.870783901176942e-05, - "loss": 0.8803, - "step": 8770 - }, - { - "epoch": 0.077618062554147, - "grad_norm": 5.039573669433594, - "learning_rate": 4.870636562409755e-05, - "loss": 0.8829, - "step": 8780 - }, - { - "epoch": 0.07770646581445924, - "grad_norm": 4.389164924621582, - "learning_rate": 4.870489223642568e-05, - "loss": 0.8927, - "step": 8790 - }, - { - "epoch": 0.07779486907477148, - "grad_norm": 3.298307180404663, - "learning_rate": 4.8703418848753814e-05, - "loss": 0.8852, - "step": 8800 - }, - { - "epoch": 0.07788327233508371, - "grad_norm": 4.961152076721191, - "learning_rate": 4.8701945461081936e-05, - "loss": 0.9537, - "step": 8810 - }, - { - "epoch": 0.07797167559539596, - "grad_norm": 3.1957497596740723, - "learning_rate": 4.870047207341007e-05, - "loss": 0.9297, - "step": 8820 - }, - { - "epoch": 0.0780600788557082, - "grad_norm": 11.411541938781738, - "learning_rate": 4.86989986857382e-05, - "loss": 0.9613, - "step": 8830 - }, - { - "epoch": 0.07814848211602043, - "grad_norm": 6.9205851554870605, - "learning_rate": 4.869752529806633e-05, - "loss": 0.8007, - "step": 8840 - }, - { - "epoch": 0.07823688537633268, - "grad_norm": 3.1366233825683594, - "learning_rate": 4.8696051910394456e-05, - "loss": 0.9913, - "step": 8850 - }, - { - "epoch": 0.07832528863664492, - "grad_norm": 5.935032844543457, - "learning_rate": 4.869457852272259e-05, - "loss": 0.9853, - "step": 8860 - }, - { - "epoch": 0.07841369189695716, - "grad_norm": 2.758721351623535, - "learning_rate": 4.869310513505071e-05, - "loss": 0.9228, - "step": 8870 - }, - { - "epoch": 0.0785020951572694, - "grad_norm": 15.793062210083008, - "learning_rate": 4.869163174737885e-05, - "loss": 0.9235, - "step": 8880 - }, - { - "epoch": 0.07859049841758164, - "grad_norm": 7.283065319061279, - "learning_rate": 4.8690158359706976e-05, - "loss": 0.9523, - "step": 8890 - }, - { - "epoch": 0.07867890167789388, - "grad_norm": 3.8146417140960693, - "learning_rate": 4.8688684972035105e-05, - "loss": 0.9303, - "step": 8900 - }, - { - "epoch": 0.07876730493820612, - "grad_norm": 8.225516319274902, - "learning_rate": 4.868721158436323e-05, - "loss": 0.804, - "step": 8910 - }, - { - "epoch": 0.07885570819851836, - "grad_norm": 6.507173538208008, - "learning_rate": 4.868573819669136e-05, - "loss": 0.7779, - "step": 8920 - }, - { - "epoch": 0.0789441114588306, - "grad_norm": 5.197059154510498, - "learning_rate": 4.868426480901949e-05, - "loss": 0.9114, - "step": 8930 - }, - { - "epoch": 0.07903251471914284, - "grad_norm": 3.2294628620147705, - "learning_rate": 4.8682791421347625e-05, - "loss": 0.7751, - "step": 8940 - }, - { - "epoch": 0.07912091797945509, - "grad_norm": 6.141626358032227, - "learning_rate": 4.8681318033675747e-05, - "loss": 0.959, - "step": 8950 - }, - { - "epoch": 0.07920932123976732, - "grad_norm": 9.837469100952148, - "learning_rate": 4.867984464600388e-05, - "loss": 0.8473, - "step": 8960 - }, - { - "epoch": 0.07929772450007956, - "grad_norm": 3.881667375564575, - "learning_rate": 4.867837125833201e-05, - "loss": 0.9265, - "step": 8970 - }, - { - "epoch": 0.0793861277603918, - "grad_norm": 3.616663932800293, - "learning_rate": 4.867689787066014e-05, - "loss": 0.9199, - "step": 8980 - }, - { - "epoch": 0.07947453102070404, - "grad_norm": 6.833520412445068, - "learning_rate": 4.867542448298827e-05, - "loss": 1.0582, - "step": 8990 - }, - { - "epoch": 0.07956293428101628, - "grad_norm": 6.162457466125488, - "learning_rate": 4.86739510953164e-05, - "loss": 0.8954, - "step": 9000 - }, - { - "epoch": 0.07965133754132853, - "grad_norm": 4.834630012512207, - "learning_rate": 4.8672477707644523e-05, - "loss": 0.897, - "step": 9010 - }, - { - "epoch": 0.07973974080164077, - "grad_norm": 6.014333248138428, - "learning_rate": 4.867100431997266e-05, - "loss": 0.8585, - "step": 9020 - }, - { - "epoch": 0.079828144061953, - "grad_norm": 4.596333980560303, - "learning_rate": 4.866953093230078e-05, - "loss": 1.0444, - "step": 9030 - }, - { - "epoch": 0.07991654732226525, - "grad_norm": 2.975196599960327, - "learning_rate": 4.8668057544628915e-05, - "loss": 0.9478, - "step": 9040 - }, - { - "epoch": 0.08000495058257749, - "grad_norm": 7.315517425537109, - "learning_rate": 4.8666584156957044e-05, - "loss": 0.9546, - "step": 9050 - }, - { - "epoch": 0.08009335384288972, - "grad_norm": 5.213054180145264, - "learning_rate": 4.866511076928517e-05, - "loss": 0.89, - "step": 9060 - }, - { - "epoch": 0.08018175710320197, - "grad_norm": 8.745564460754395, - "learning_rate": 4.86636373816133e-05, - "loss": 1.0033, - "step": 9070 - }, - { - "epoch": 0.08027016036351421, - "grad_norm": 12.515480995178223, - "learning_rate": 4.8662163993941435e-05, - "loss": 0.9331, - "step": 9080 - }, - { - "epoch": 0.08035856362382644, - "grad_norm": 8.361699104309082, - "learning_rate": 4.866069060626956e-05, - "loss": 0.9635, - "step": 9090 - }, - { - "epoch": 0.08044696688413869, - "grad_norm": 8.072646141052246, - "learning_rate": 4.865921721859769e-05, - "loss": 0.8679, - "step": 9100 - }, - { - "epoch": 0.08053537014445093, - "grad_norm": 2.1535627841949463, - "learning_rate": 4.865774383092582e-05, - "loss": 0.949, - "step": 9110 - }, - { - "epoch": 0.08062377340476316, - "grad_norm": 4.744264125823975, - "learning_rate": 4.865627044325395e-05, - "loss": 1.0331, - "step": 9120 - }, - { - "epoch": 0.08071217666507541, - "grad_norm": 8.3015775680542, - "learning_rate": 4.865479705558208e-05, - "loss": 0.9036, - "step": 9130 - }, - { - "epoch": 0.08080057992538765, - "grad_norm": 6.706944465637207, - "learning_rate": 4.8653323667910206e-05, - "loss": 0.9976, - "step": 9140 - }, - { - "epoch": 0.08088898318569988, - "grad_norm": 3.629948139190674, - "learning_rate": 4.8651850280238334e-05, - "loss": 1.0273, - "step": 9150 - }, - { - "epoch": 0.08097738644601213, - "grad_norm": 5.1356306076049805, - "learning_rate": 4.865037689256647e-05, - "loss": 0.8853, - "step": 9160 - }, - { - "epoch": 0.08106578970632437, - "grad_norm": 6.095510005950928, - "learning_rate": 4.864890350489459e-05, - "loss": 0.8794, - "step": 9170 - }, - { - "epoch": 0.0811541929666366, - "grad_norm": 5.80955171585083, - "learning_rate": 4.8647430117222726e-05, - "loss": 1.0623, - "step": 9180 - }, - { - "epoch": 0.08124259622694885, - "grad_norm": 5.339105129241943, - "learning_rate": 4.8645956729550854e-05, - "loss": 0.886, - "step": 9190 - }, - { - "epoch": 0.0813309994872611, - "grad_norm": 6.6511549949646, - "learning_rate": 4.864448334187898e-05, - "loss": 0.9781, - "step": 9200 - }, - { - "epoch": 0.08141940274757332, - "grad_norm": 7.387551784515381, - "learning_rate": 4.864300995420711e-05, - "loss": 0.9809, - "step": 9210 - }, - { - "epoch": 0.08150780600788557, - "grad_norm": 4.8425469398498535, - "learning_rate": 4.8641536566535246e-05, - "loss": 0.9702, - "step": 9220 - }, - { - "epoch": 0.08159620926819781, - "grad_norm": 9.483480453491211, - "learning_rate": 4.864006317886337e-05, - "loss": 0.8845, - "step": 9230 - }, - { - "epoch": 0.08168461252851004, - "grad_norm": 5.919254302978516, - "learning_rate": 4.86385897911915e-05, - "loss": 0.9047, - "step": 9240 - }, - { - "epoch": 0.08177301578882229, - "grad_norm": 5.504541397094727, - "learning_rate": 4.863711640351963e-05, - "loss": 0.9067, - "step": 9250 - }, - { - "epoch": 0.08186141904913453, - "grad_norm": 3.395012617111206, - "learning_rate": 4.863564301584776e-05, - "loss": 0.9285, - "step": 9260 - }, - { - "epoch": 0.08194982230944678, - "grad_norm": 5.435262203216553, - "learning_rate": 4.863416962817589e-05, - "loss": 0.9868, - "step": 9270 - }, - { - "epoch": 0.08203822556975901, - "grad_norm": 4.4511637687683105, - "learning_rate": 4.8632696240504016e-05, - "loss": 0.8484, - "step": 9280 - }, - { - "epoch": 0.08212662883007125, - "grad_norm": 5.902878761291504, - "learning_rate": 4.8631222852832144e-05, - "loss": 0.9654, - "step": 9290 - }, - { - "epoch": 0.0822150320903835, - "grad_norm": 5.942347526550293, - "learning_rate": 4.862974946516028e-05, - "loss": 0.8642, - "step": 9300 - }, - { - "epoch": 0.08230343535069573, - "grad_norm": 4.489561080932617, - "learning_rate": 4.862827607748841e-05, - "loss": 0.906, - "step": 9310 - }, - { - "epoch": 0.08239183861100798, - "grad_norm": 4.833526611328125, - "learning_rate": 4.8626802689816536e-05, - "loss": 0.9049, - "step": 9320 - }, - { - "epoch": 0.08248024187132022, - "grad_norm": 5.06949520111084, - "learning_rate": 4.8625329302144665e-05, - "loss": 0.8651, - "step": 9330 - }, - { - "epoch": 0.08256864513163245, - "grad_norm": 3.7296364307403564, - "learning_rate": 4.862385591447279e-05, - "loss": 0.9039, - "step": 9340 - }, - { - "epoch": 0.0826570483919447, - "grad_norm": 4.1431145668029785, - "learning_rate": 4.862238252680093e-05, - "loss": 0.8389, - "step": 9350 - }, - { - "epoch": 0.08274545165225694, - "grad_norm": 5.955991268157959, - "learning_rate": 4.8620909139129056e-05, - "loss": 1.0676, - "step": 9360 - }, - { - "epoch": 0.08283385491256917, - "grad_norm": 8.427042961120605, - "learning_rate": 4.8619435751457185e-05, - "loss": 0.9391, - "step": 9370 - }, - { - "epoch": 0.08292225817288142, - "grad_norm": 10.462730407714844, - "learning_rate": 4.861796236378531e-05, - "loss": 1.0418, - "step": 9380 - }, - { - "epoch": 0.08301066143319366, - "grad_norm": 6.5250725746154785, - "learning_rate": 4.861648897611344e-05, - "loss": 0.977, - "step": 9390 - }, - { - "epoch": 0.08309906469350589, - "grad_norm": 7.1132283210754395, - "learning_rate": 4.861501558844157e-05, - "loss": 0.9021, - "step": 9400 - }, - { - "epoch": 0.08318746795381814, - "grad_norm": 3.116255044937134, - "learning_rate": 4.8613542200769705e-05, - "loss": 0.9143, - "step": 9410 - }, - { - "epoch": 0.08327587121413038, - "grad_norm": 3.6663601398468018, - "learning_rate": 4.861206881309783e-05, - "loss": 1.0068, - "step": 9420 - }, - { - "epoch": 0.08336427447444261, - "grad_norm": 9.257044792175293, - "learning_rate": 4.861059542542596e-05, - "loss": 0.8149, - "step": 9430 - }, - { - "epoch": 0.08345267773475486, - "grad_norm": 8.620640754699707, - "learning_rate": 4.860912203775409e-05, - "loss": 0.8673, - "step": 9440 - }, - { - "epoch": 0.0835410809950671, - "grad_norm": 5.518143653869629, - "learning_rate": 4.860764865008222e-05, - "loss": 0.867, - "step": 9450 - }, - { - "epoch": 0.08362948425537933, - "grad_norm": 5.901253700256348, - "learning_rate": 4.860617526241035e-05, - "loss": 0.8197, - "step": 9460 - }, - { - "epoch": 0.08371788751569158, - "grad_norm": 5.221851825714111, - "learning_rate": 4.860470187473848e-05, - "loss": 0.8809, - "step": 9470 - }, - { - "epoch": 0.08380629077600382, - "grad_norm": 12.859980583190918, - "learning_rate": 4.8603228487066604e-05, - "loss": 1.0427, - "step": 9480 - }, - { - "epoch": 0.08389469403631605, - "grad_norm": 7.886823654174805, - "learning_rate": 4.860175509939474e-05, - "loss": 0.9257, - "step": 9490 - }, - { - "epoch": 0.0839830972966283, - "grad_norm": 12.58973503112793, - "learning_rate": 4.860028171172286e-05, - "loss": 0.8344, - "step": 9500 - }, - { - "epoch": 0.08407150055694054, - "grad_norm": 3.997868537902832, - "learning_rate": 4.8598808324050995e-05, - "loss": 0.8301, - "step": 9510 - }, - { - "epoch": 0.08415990381725277, - "grad_norm": 10.132091522216797, - "learning_rate": 4.8597334936379124e-05, - "loss": 0.9948, - "step": 9520 - }, - { - "epoch": 0.08424830707756502, - "grad_norm": 10.275101661682129, - "learning_rate": 4.859586154870725e-05, - "loss": 0.9757, - "step": 9530 - }, - { - "epoch": 0.08433671033787726, - "grad_norm": 7.305060386657715, - "learning_rate": 4.859438816103538e-05, - "loss": 0.8798, - "step": 9540 - }, - { - "epoch": 0.08442511359818951, - "grad_norm": 4.845224857330322, - "learning_rate": 4.8592914773363516e-05, - "loss": 0.8895, - "step": 9550 - }, - { - "epoch": 0.08451351685850174, - "grad_norm": 7.1752519607543945, - "learning_rate": 4.859144138569164e-05, - "loss": 0.9172, - "step": 9560 - }, - { - "epoch": 0.08460192011881398, - "grad_norm": 7.0624918937683105, - "learning_rate": 4.858996799801977e-05, - "loss": 0.9387, - "step": 9570 - }, - { - "epoch": 0.08469032337912623, - "grad_norm": 5.013216495513916, - "learning_rate": 4.85884946103479e-05, - "loss": 0.8568, - "step": 9580 - }, - { - "epoch": 0.08477872663943846, - "grad_norm": 7.294334411621094, - "learning_rate": 4.858702122267603e-05, - "loss": 0.8542, - "step": 9590 - }, - { - "epoch": 0.0848671298997507, - "grad_norm": 4.1750054359436035, - "learning_rate": 4.858554783500416e-05, - "loss": 0.9288, - "step": 9600 - }, - { - "epoch": 0.08495553316006295, - "grad_norm": 4.327548027038574, - "learning_rate": 4.8584074447332286e-05, - "loss": 0.8887, - "step": 9610 - }, - { - "epoch": 0.08504393642037518, - "grad_norm": 8.034274101257324, - "learning_rate": 4.8582601059660414e-05, - "loss": 0.9727, - "step": 9620 - }, - { - "epoch": 0.08513233968068742, - "grad_norm": 5.522838592529297, - "learning_rate": 4.858112767198855e-05, - "loss": 0.8279, - "step": 9630 - }, - { - "epoch": 0.08522074294099967, - "grad_norm": 3.7303531169891357, - "learning_rate": 4.857965428431667e-05, - "loss": 0.9085, - "step": 9640 - }, - { - "epoch": 0.0853091462013119, - "grad_norm": 5.9661054611206055, - "learning_rate": 4.8578180896644806e-05, - "loss": 0.7948, - "step": 9650 - }, - { - "epoch": 0.08539754946162414, - "grad_norm": 3.590968608856201, - "learning_rate": 4.8576707508972934e-05, - "loss": 0.8799, - "step": 9660 - }, - { - "epoch": 0.08548595272193639, - "grad_norm": 5.45489501953125, - "learning_rate": 4.857523412130106e-05, - "loss": 0.8394, - "step": 9670 - }, - { - "epoch": 0.08557435598224862, - "grad_norm": 5.977509498596191, - "learning_rate": 4.857376073362919e-05, - "loss": 0.9623, - "step": 9680 - }, - { - "epoch": 0.08566275924256087, - "grad_norm": 7.4215803146362305, - "learning_rate": 4.8572287345957326e-05, - "loss": 0.9526, - "step": 9690 - }, - { - "epoch": 0.08575116250287311, - "grad_norm": 5.85550594329834, - "learning_rate": 4.857081395828545e-05, - "loss": 0.8054, - "step": 9700 - }, - { - "epoch": 0.08583956576318534, - "grad_norm": 6.150406360626221, - "learning_rate": 4.856934057061358e-05, - "loss": 0.9118, - "step": 9710 - }, - { - "epoch": 0.08592796902349759, - "grad_norm": 6.535300254821777, - "learning_rate": 4.8567867182941704e-05, - "loss": 0.8618, - "step": 9720 - }, - { - "epoch": 0.08601637228380983, - "grad_norm": 6.574619293212891, - "learning_rate": 4.856639379526984e-05, - "loss": 0.9562, - "step": 9730 - }, - { - "epoch": 0.08610477554412206, - "grad_norm": 5.1996259689331055, - "learning_rate": 4.856492040759797e-05, - "loss": 0.8255, - "step": 9740 - }, - { - "epoch": 0.0861931788044343, - "grad_norm": 5.212782382965088, - "learning_rate": 4.8563447019926096e-05, - "loss": 0.9455, - "step": 9750 - }, - { - "epoch": 0.08628158206474655, - "grad_norm": 5.106650352478027, - "learning_rate": 4.8561973632254225e-05, - "loss": 0.9627, - "step": 9760 - }, - { - "epoch": 0.08636998532505878, - "grad_norm": 3.951993942260742, - "learning_rate": 4.856050024458236e-05, - "loss": 0.8424, - "step": 9770 - }, - { - "epoch": 0.08645838858537103, - "grad_norm": 3.8278937339782715, - "learning_rate": 4.855902685691048e-05, - "loss": 0.8581, - "step": 9780 - }, - { - "epoch": 0.08654679184568327, - "grad_norm": 7.4229736328125, - "learning_rate": 4.8557553469238616e-05, - "loss": 0.8744, - "step": 9790 - }, - { - "epoch": 0.0866351951059955, - "grad_norm": 2.8029661178588867, - "learning_rate": 4.8556080081566745e-05, - "loss": 0.7749, - "step": 9800 - }, - { - "epoch": 0.08672359836630775, - "grad_norm": 7.567657947540283, - "learning_rate": 4.855460669389487e-05, - "loss": 0.7839, - "step": 9810 - }, - { - "epoch": 0.08681200162661999, - "grad_norm": 5.7860026359558105, - "learning_rate": 4.8553133306223e-05, - "loss": 0.918, - "step": 9820 - }, - { - "epoch": 0.08690040488693224, - "grad_norm": 6.564748764038086, - "learning_rate": 4.8551659918551137e-05, - "loss": 0.787, - "step": 9830 - }, - { - "epoch": 0.08698880814724447, - "grad_norm": 8.89094352722168, - "learning_rate": 4.855018653087926e-05, - "loss": 0.7996, - "step": 9840 - }, - { - "epoch": 0.08707721140755671, - "grad_norm": 4.378356456756592, - "learning_rate": 4.854871314320739e-05, - "loss": 0.7955, - "step": 9850 - }, - { - "epoch": 0.08716561466786896, - "grad_norm": 8.747638702392578, - "learning_rate": 4.8547239755535515e-05, - "loss": 0.8285, - "step": 9860 - }, - { - "epoch": 0.08725401792818119, - "grad_norm": 4.274459362030029, - "learning_rate": 4.854576636786365e-05, - "loss": 1.0261, - "step": 9870 - }, - { - "epoch": 0.08734242118849343, - "grad_norm": 4.295324802398682, - "learning_rate": 4.854429298019178e-05, - "loss": 0.9296, - "step": 9880 - }, - { - "epoch": 0.08743082444880568, - "grad_norm": 6.3039960861206055, - "learning_rate": 4.854281959251991e-05, - "loss": 0.8966, - "step": 9890 - }, - { - "epoch": 0.08751922770911791, - "grad_norm": 8.68468189239502, - "learning_rate": 4.8541346204848035e-05, - "loss": 0.8689, - "step": 9900 - }, - { - "epoch": 0.08760763096943015, - "grad_norm": 4.591320991516113, - "learning_rate": 4.853987281717617e-05, - "loss": 0.8757, - "step": 9910 - }, - { - "epoch": 0.0876960342297424, - "grad_norm": 3.1836514472961426, - "learning_rate": 4.853839942950429e-05, - "loss": 0.829, - "step": 9920 - }, - { - "epoch": 0.08778443749005463, - "grad_norm": 6.4215850830078125, - "learning_rate": 4.853692604183243e-05, - "loss": 0.8945, - "step": 9930 - }, - { - "epoch": 0.08787284075036687, - "grad_norm": 4.03424072265625, - "learning_rate": 4.8535452654160555e-05, - "loss": 0.8697, - "step": 9940 - }, - { - "epoch": 0.08796124401067912, - "grad_norm": 6.259154796600342, - "learning_rate": 4.8533979266488684e-05, - "loss": 0.8026, - "step": 9950 - }, - { - "epoch": 0.08804964727099135, - "grad_norm": 6.984891891479492, - "learning_rate": 4.853250587881681e-05, - "loss": 0.9533, - "step": 9960 - }, - { - "epoch": 0.0881380505313036, - "grad_norm": 5.218731880187988, - "learning_rate": 4.853103249114494e-05, - "loss": 0.9003, - "step": 9970 - }, - { - "epoch": 0.08822645379161584, - "grad_norm": 9.232502937316895, - "learning_rate": 4.852955910347307e-05, - "loss": 0.9712, - "step": 9980 - }, - { - "epoch": 0.08831485705192807, - "grad_norm": 5.730737686157227, - "learning_rate": 4.8528085715801204e-05, - "loss": 0.9322, - "step": 9990 - }, - { - "epoch": 0.08840326031224031, - "grad_norm": 6.135512351989746, - "learning_rate": 4.8526612328129325e-05, - "loss": 0.8462, - "step": 10000 - }, - { - "epoch": 0.08849166357255256, - "grad_norm": 5.257114887237549, - "learning_rate": 4.852513894045746e-05, - "loss": 0.797, - "step": 10010 - }, - { - "epoch": 0.08858006683286479, - "grad_norm": 4.087240219116211, - "learning_rate": 4.852366555278559e-05, - "loss": 0.8649, - "step": 10020 - }, - { - "epoch": 0.08866847009317703, - "grad_norm": 7.3108367919921875, - "learning_rate": 4.852219216511372e-05, - "loss": 0.8297, - "step": 10030 - }, - { - "epoch": 0.08875687335348928, - "grad_norm": 9.954843521118164, - "learning_rate": 4.8520718777441846e-05, - "loss": 0.8958, - "step": 10040 - }, - { - "epoch": 0.08884527661380151, - "grad_norm": 5.5591864585876465, - "learning_rate": 4.851924538976998e-05, - "loss": 0.7817, - "step": 10050 - }, - { - "epoch": 0.08893367987411376, - "grad_norm": 3.648822546005249, - "learning_rate": 4.85177720020981e-05, - "loss": 0.8745, - "step": 10060 - }, - { - "epoch": 0.089022083134426, - "grad_norm": 7.206264495849609, - "learning_rate": 4.851629861442624e-05, - "loss": 0.8934, - "step": 10070 - }, - { - "epoch": 0.08911048639473824, - "grad_norm": 12.365501403808594, - "learning_rate": 4.8514825226754366e-05, - "loss": 0.8247, - "step": 10080 - }, - { - "epoch": 0.08919888965505048, - "grad_norm": 10.052908897399902, - "learning_rate": 4.8513351839082494e-05, - "loss": 0.8579, - "step": 10090 - }, - { - "epoch": 0.08928729291536272, - "grad_norm": 5.000236988067627, - "learning_rate": 4.851187845141062e-05, - "loss": 0.891, - "step": 10100 - }, - { - "epoch": 0.08937569617567497, - "grad_norm": 5.344237804412842, - "learning_rate": 4.851040506373875e-05, - "loss": 1.045, - "step": 10110 - }, - { - "epoch": 0.0894640994359872, - "grad_norm": 7.5027666091918945, - "learning_rate": 4.850893167606688e-05, - "loss": 0.8591, - "step": 10120 - }, - { - "epoch": 0.08955250269629944, - "grad_norm": 5.193624496459961, - "learning_rate": 4.8507458288395014e-05, - "loss": 0.9054, - "step": 10130 - }, - { - "epoch": 0.08964090595661169, - "grad_norm": 4.23881196975708, - "learning_rate": 4.850598490072314e-05, - "loss": 1.033, - "step": 10140 - }, - { - "epoch": 0.08972930921692392, - "grad_norm": 4.646491527557373, - "learning_rate": 4.850451151305127e-05, - "loss": 0.991, - "step": 10150 - }, - { - "epoch": 0.08981771247723616, - "grad_norm": 3.2145233154296875, - "learning_rate": 4.85030381253794e-05, - "loss": 0.9203, - "step": 10160 - }, - { - "epoch": 0.0899061157375484, - "grad_norm": 3.4959211349487305, - "learning_rate": 4.850156473770753e-05, - "loss": 0.9051, - "step": 10170 - }, - { - "epoch": 0.08999451899786064, - "grad_norm": 3.9835028648376465, - "learning_rate": 4.8500091350035656e-05, - "loss": 0.8963, - "step": 10180 - }, - { - "epoch": 0.09008292225817288, - "grad_norm": 5.471762180328369, - "learning_rate": 4.849861796236379e-05, - "loss": 0.9159, - "step": 10190 - }, - { - "epoch": 0.09017132551848513, - "grad_norm": 8.925811767578125, - "learning_rate": 4.849714457469192e-05, - "loss": 0.9832, - "step": 10200 - }, - { - "epoch": 0.09025972877879736, - "grad_norm": 4.184961795806885, - "learning_rate": 4.849567118702005e-05, - "loss": 0.9145, - "step": 10210 - }, - { - "epoch": 0.0903481320391096, - "grad_norm": 5.561929225921631, - "learning_rate": 4.8494197799348176e-05, - "loss": 0.8295, - "step": 10220 - }, - { - "epoch": 0.09043653529942185, - "grad_norm": 4.6912665367126465, - "learning_rate": 4.8492724411676305e-05, - "loss": 0.7778, - "step": 10230 - }, - { - "epoch": 0.09052493855973408, - "grad_norm": 4.672646999359131, - "learning_rate": 4.849125102400443e-05, - "loss": 0.9648, - "step": 10240 - }, - { - "epoch": 0.09061334182004632, - "grad_norm": 3.2846500873565674, - "learning_rate": 4.848977763633256e-05, - "loss": 0.9489, - "step": 10250 - }, - { - "epoch": 0.09070174508035857, - "grad_norm": 5.393837928771973, - "learning_rate": 4.8488304248660696e-05, - "loss": 0.8983, - "step": 10260 - }, - { - "epoch": 0.0907901483406708, - "grad_norm": 2.277843475341797, - "learning_rate": 4.8486830860988825e-05, - "loss": 0.8851, - "step": 10270 - }, - { - "epoch": 0.09087855160098304, - "grad_norm": 2.764491081237793, - "learning_rate": 4.848535747331695e-05, - "loss": 0.9351, - "step": 10280 - }, - { - "epoch": 0.09096695486129529, - "grad_norm": 10.896225929260254, - "learning_rate": 4.848388408564508e-05, - "loss": 0.9205, - "step": 10290 - }, - { - "epoch": 0.09105535812160752, - "grad_norm": 4.834585666656494, - "learning_rate": 4.848241069797321e-05, - "loss": 1.0148, - "step": 10300 - }, - { - "epoch": 0.09114376138191976, - "grad_norm": 4.664348125457764, - "learning_rate": 4.848093731030134e-05, - "loss": 0.8952, - "step": 10310 - }, - { - "epoch": 0.09123216464223201, - "grad_norm": 2.5782110691070557, - "learning_rate": 4.847946392262947e-05, - "loss": 0.8659, - "step": 10320 - }, - { - "epoch": 0.09132056790254424, - "grad_norm": 6.341892719268799, - "learning_rate": 4.8477990534957595e-05, - "loss": 0.955, - "step": 10330 - }, - { - "epoch": 0.09140897116285648, - "grad_norm": 4.601062297821045, - "learning_rate": 4.847651714728573e-05, - "loss": 0.8627, - "step": 10340 - }, - { - "epoch": 0.09149737442316873, - "grad_norm": 3.9025869369506836, - "learning_rate": 4.847504375961386e-05, - "loss": 0.8759, - "step": 10350 - }, - { - "epoch": 0.09158577768348097, - "grad_norm": 5.3651509284973145, - "learning_rate": 4.847357037194199e-05, - "loss": 0.8328, - "step": 10360 - }, - { - "epoch": 0.0916741809437932, - "grad_norm": 3.72887921333313, - "learning_rate": 4.8472096984270115e-05, - "loss": 0.8937, - "step": 10370 - }, - { - "epoch": 0.09176258420410545, - "grad_norm": 2.6255335807800293, - "learning_rate": 4.847062359659825e-05, - "loss": 0.9507, - "step": 10380 - }, - { - "epoch": 0.0918509874644177, - "grad_norm": 8.633094787597656, - "learning_rate": 4.846915020892637e-05, - "loss": 0.9912, - "step": 10390 - }, - { - "epoch": 0.09193939072472992, - "grad_norm": 4.747097015380859, - "learning_rate": 4.846767682125451e-05, - "loss": 0.9495, - "step": 10400 - }, - { - "epoch": 0.09202779398504217, - "grad_norm": 3.7421910762786865, - "learning_rate": 4.8466203433582635e-05, - "loss": 0.8498, - "step": 10410 - }, - { - "epoch": 0.09211619724535441, - "grad_norm": 7.091905117034912, - "learning_rate": 4.8464730045910764e-05, - "loss": 0.8067, - "step": 10420 - }, - { - "epoch": 0.09220460050566665, - "grad_norm": 6.685669898986816, - "learning_rate": 4.846325665823889e-05, - "loss": 0.8737, - "step": 10430 - }, - { - "epoch": 0.09229300376597889, - "grad_norm": 4.483755588531494, - "learning_rate": 4.846178327056702e-05, - "loss": 0.9254, - "step": 10440 - }, - { - "epoch": 0.09238140702629113, - "grad_norm": 6.154871940612793, - "learning_rate": 4.846030988289515e-05, - "loss": 0.9541, - "step": 10450 - }, - { - "epoch": 0.09246981028660337, - "grad_norm": 5.813018321990967, - "learning_rate": 4.8458836495223284e-05, - "loss": 0.8701, - "step": 10460 - }, - { - "epoch": 0.09255821354691561, - "grad_norm": 3.061584711074829, - "learning_rate": 4.8457363107551405e-05, - "loss": 0.9496, - "step": 10470 - }, - { - "epoch": 0.09264661680722786, - "grad_norm": 6.514309883117676, - "learning_rate": 4.845588971987954e-05, - "loss": 0.8014, - "step": 10480 - }, - { - "epoch": 0.09273502006754009, - "grad_norm": 2.5385093688964844, - "learning_rate": 4.845441633220767e-05, - "loss": 0.9017, - "step": 10490 - }, - { - "epoch": 0.09282342332785233, - "grad_norm": 5.3517680168151855, - "learning_rate": 4.84529429445358e-05, - "loss": 0.8886, - "step": 10500 - }, - { - "epoch": 0.09291182658816458, - "grad_norm": 6.019667625427246, - "learning_rate": 4.8451469556863926e-05, - "loss": 0.8635, - "step": 10510 - }, - { - "epoch": 0.0930002298484768, - "grad_norm": 6.268322944641113, - "learning_rate": 4.844999616919206e-05, - "loss": 0.7925, - "step": 10520 - }, - { - "epoch": 0.09308863310878905, - "grad_norm": 4.191029071807861, - "learning_rate": 4.844852278152018e-05, - "loss": 0.8386, - "step": 10530 - }, - { - "epoch": 0.0931770363691013, - "grad_norm": 5.0751519203186035, - "learning_rate": 4.844704939384832e-05, - "loss": 0.9943, - "step": 10540 - }, - { - "epoch": 0.09326543962941353, - "grad_norm": 4.535125732421875, - "learning_rate": 4.844557600617644e-05, - "loss": 0.8693, - "step": 10550 - }, - { - "epoch": 0.09335384288972577, - "grad_norm": 6.545707702636719, - "learning_rate": 4.8444102618504574e-05, - "loss": 0.9216, - "step": 10560 - }, - { - "epoch": 0.09344224615003802, - "grad_norm": 7.115601062774658, - "learning_rate": 4.84426292308327e-05, - "loss": 0.9629, - "step": 10570 - }, - { - "epoch": 0.09353064941035025, - "grad_norm": 4.578786373138428, - "learning_rate": 4.844115584316083e-05, - "loss": 0.9829, - "step": 10580 - }, - { - "epoch": 0.09361905267066249, - "grad_norm": 3.8402419090270996, - "learning_rate": 4.843968245548896e-05, - "loss": 0.9583, - "step": 10590 - }, - { - "epoch": 0.09370745593097474, - "grad_norm": 3.4268300533294678, - "learning_rate": 4.8438209067817094e-05, - "loss": 0.9275, - "step": 10600 - }, - { - "epoch": 0.09379585919128698, - "grad_norm": 4.041760444641113, - "learning_rate": 4.8436735680145216e-05, - "loss": 0.8986, - "step": 10610 - }, - { - "epoch": 0.09388426245159921, - "grad_norm": 4.180763244628906, - "learning_rate": 4.843526229247335e-05, - "loss": 0.9127, - "step": 10620 - }, - { - "epoch": 0.09397266571191146, - "grad_norm": 6.7749409675598145, - "learning_rate": 4.843378890480148e-05, - "loss": 0.8426, - "step": 10630 - }, - { - "epoch": 0.0940610689722237, - "grad_norm": 6.551861763000488, - "learning_rate": 4.843231551712961e-05, - "loss": 0.7469, - "step": 10640 - }, - { - "epoch": 0.09414947223253593, - "grad_norm": 6.459712982177734, - "learning_rate": 4.8430842129457736e-05, - "loss": 0.8874, - "step": 10650 - }, - { - "epoch": 0.09423787549284818, - "grad_norm": 8.966519355773926, - "learning_rate": 4.842936874178587e-05, - "loss": 0.961, - "step": 10660 - }, - { - "epoch": 0.09432627875316042, - "grad_norm": 4.387051582336426, - "learning_rate": 4.842789535411399e-05, - "loss": 0.857, - "step": 10670 - }, - { - "epoch": 0.09441468201347265, - "grad_norm": 5.910558223724365, - "learning_rate": 4.842642196644213e-05, - "loss": 0.8454, - "step": 10680 - }, - { - "epoch": 0.0945030852737849, - "grad_norm": 7.334143161773682, - "learning_rate": 4.842494857877025e-05, - "loss": 0.8719, - "step": 10690 - }, - { - "epoch": 0.09459148853409714, - "grad_norm": 3.0009491443634033, - "learning_rate": 4.8423475191098385e-05, - "loss": 0.9587, - "step": 10700 - }, - { - "epoch": 0.09467989179440937, - "grad_norm": 5.048775672912598, - "learning_rate": 4.842200180342651e-05, - "loss": 0.7913, - "step": 10710 - }, - { - "epoch": 0.09476829505472162, - "grad_norm": 9.675728797912598, - "learning_rate": 4.842052841575464e-05, - "loss": 0.9415, - "step": 10720 - }, - { - "epoch": 0.09485669831503386, - "grad_norm": 6.40328311920166, - "learning_rate": 4.841905502808277e-05, - "loss": 0.883, - "step": 10730 - }, - { - "epoch": 0.0949451015753461, - "grad_norm": 2.5814383029937744, - "learning_rate": 4.8417581640410905e-05, - "loss": 0.8322, - "step": 10740 - }, - { - "epoch": 0.09503350483565834, - "grad_norm": 7.165457248687744, - "learning_rate": 4.8416108252739026e-05, - "loss": 0.8912, - "step": 10750 - }, - { - "epoch": 0.09512190809597058, - "grad_norm": 7.884331703186035, - "learning_rate": 4.841463486506716e-05, - "loss": 0.807, - "step": 10760 - }, - { - "epoch": 0.09521031135628281, - "grad_norm": 3.5740268230438232, - "learning_rate": 4.841316147739529e-05, - "loss": 0.8781, - "step": 10770 - }, - { - "epoch": 0.09529871461659506, - "grad_norm": 5.46319055557251, - "learning_rate": 4.841168808972342e-05, - "loss": 0.8125, - "step": 10780 - }, - { - "epoch": 0.0953871178769073, - "grad_norm": 4.8293585777282715, - "learning_rate": 4.841021470205155e-05, - "loss": 0.7758, - "step": 10790 - }, - { - "epoch": 0.09547552113721954, - "grad_norm": 3.7307755947113037, - "learning_rate": 4.8408741314379675e-05, - "loss": 0.9022, - "step": 10800 - }, - { - "epoch": 0.09556392439753178, - "grad_norm": 6.22001838684082, - "learning_rate": 4.84072679267078e-05, - "loss": 0.8317, - "step": 10810 - }, - { - "epoch": 0.09565232765784402, - "grad_norm": 7.880277633666992, - "learning_rate": 4.840579453903594e-05, - "loss": 0.8278, - "step": 10820 - }, - { - "epoch": 0.09574073091815626, - "grad_norm": 7.64493465423584, - "learning_rate": 4.840432115136406e-05, - "loss": 0.9138, - "step": 10830 - }, - { - "epoch": 0.0958291341784685, - "grad_norm": 3.439091920852661, - "learning_rate": 4.8402847763692195e-05, - "loss": 0.928, - "step": 10840 - }, - { - "epoch": 0.09591753743878075, - "grad_norm": 5.199951648712158, - "learning_rate": 4.8401374376020324e-05, - "loss": 0.7234, - "step": 10850 - }, - { - "epoch": 0.09600594069909298, - "grad_norm": 5.242109298706055, - "learning_rate": 4.839990098834845e-05, - "loss": 0.8521, - "step": 10860 - }, - { - "epoch": 0.09609434395940522, - "grad_norm": 8.574398040771484, - "learning_rate": 4.839842760067658e-05, - "loss": 0.8285, - "step": 10870 - }, - { - "epoch": 0.09618274721971747, - "grad_norm": 7.547273635864258, - "learning_rate": 4.8396954213004715e-05, - "loss": 0.8114, - "step": 10880 - }, - { - "epoch": 0.09627115048002971, - "grad_norm": 5.9737114906311035, - "learning_rate": 4.839548082533284e-05, - "loss": 0.8522, - "step": 10890 - }, - { - "epoch": 0.09635955374034194, - "grad_norm": 6.698936462402344, - "learning_rate": 4.839400743766097e-05, - "loss": 0.9251, - "step": 10900 - }, - { - "epoch": 0.09644795700065419, - "grad_norm": 7.054299831390381, - "learning_rate": 4.8392534049989094e-05, - "loss": 0.9101, - "step": 10910 - }, - { - "epoch": 0.09653636026096643, - "grad_norm": 3.2542550563812256, - "learning_rate": 4.839106066231723e-05, - "loss": 0.9316, - "step": 10920 - }, - { - "epoch": 0.09662476352127866, - "grad_norm": 4.418131351470947, - "learning_rate": 4.838958727464536e-05, - "loss": 0.8635, - "step": 10930 - }, - { - "epoch": 0.0967131667815909, - "grad_norm": 6.187062740325928, - "learning_rate": 4.8388113886973486e-05, - "loss": 0.8683, - "step": 10940 - }, - { - "epoch": 0.09680157004190315, - "grad_norm": 4.491509914398193, - "learning_rate": 4.8386640499301614e-05, - "loss": 0.7484, - "step": 10950 - }, - { - "epoch": 0.09688997330221538, - "grad_norm": 3.0069119930267334, - "learning_rate": 4.838516711162975e-05, - "loss": 0.8678, - "step": 10960 - }, - { - "epoch": 0.09697837656252763, - "grad_norm": 6.826173305511475, - "learning_rate": 4.838369372395787e-05, - "loss": 0.9782, - "step": 10970 - }, - { - "epoch": 0.09706677982283987, - "grad_norm": 6.12066650390625, - "learning_rate": 4.8382220336286006e-05, - "loss": 0.7994, - "step": 10980 - }, - { - "epoch": 0.0971551830831521, - "grad_norm": 5.961526393890381, - "learning_rate": 4.8380746948614134e-05, - "loss": 1.0027, - "step": 10990 - }, - { - "epoch": 0.09724358634346435, - "grad_norm": 8.005057334899902, - "learning_rate": 4.837927356094226e-05, - "loss": 0.9393, - "step": 11000 - }, - { - "epoch": 0.09733198960377659, - "grad_norm": 5.7164764404296875, - "learning_rate": 4.837780017327039e-05, - "loss": 0.9266, - "step": 11010 - }, - { - "epoch": 0.09742039286408882, - "grad_norm": 1.6349455118179321, - "learning_rate": 4.837632678559852e-05, - "loss": 0.7811, - "step": 11020 - }, - { - "epoch": 0.09750879612440107, - "grad_norm": 3.9553287029266357, - "learning_rate": 4.837485339792665e-05, - "loss": 0.9023, - "step": 11030 - }, - { - "epoch": 0.09759719938471331, - "grad_norm": 3.272874355316162, - "learning_rate": 4.837338001025478e-05, - "loss": 0.7611, - "step": 11040 - }, - { - "epoch": 0.09768560264502554, - "grad_norm": 8.173721313476562, - "learning_rate": 4.837190662258291e-05, - "loss": 0.9666, - "step": 11050 - }, - { - "epoch": 0.09777400590533779, - "grad_norm": 5.934161186218262, - "learning_rate": 4.837043323491104e-05, - "loss": 0.8923, - "step": 11060 - }, - { - "epoch": 0.09786240916565003, - "grad_norm": 5.214540004730225, - "learning_rate": 4.836895984723917e-05, - "loss": 0.8765, - "step": 11070 - }, - { - "epoch": 0.09795081242596226, - "grad_norm": 2.524299144744873, - "learning_rate": 4.8367486459567296e-05, - "loss": 0.9086, - "step": 11080 - }, - { - "epoch": 0.09803921568627451, - "grad_norm": 5.146005153656006, - "learning_rate": 4.8366013071895424e-05, - "loss": 0.7716, - "step": 11090 - }, - { - "epoch": 0.09812761894658675, - "grad_norm": 4.304093837738037, - "learning_rate": 4.836453968422356e-05, - "loss": 0.8965, - "step": 11100 - }, - { - "epoch": 0.09821602220689898, - "grad_norm": 4.954878807067871, - "learning_rate": 4.836306629655169e-05, - "loss": 0.962, - "step": 11110 - }, - { - "epoch": 0.09830442546721123, - "grad_norm": 6.129110336303711, - "learning_rate": 4.8361592908879816e-05, - "loss": 0.9413, - "step": 11120 - }, - { - "epoch": 0.09839282872752347, - "grad_norm": 7.776825904846191, - "learning_rate": 4.8360119521207945e-05, - "loss": 0.8564, - "step": 11130 - }, - { - "epoch": 0.0984812319878357, - "grad_norm": 4.105815887451172, - "learning_rate": 4.835864613353607e-05, - "loss": 0.7796, - "step": 11140 - }, - { - "epoch": 0.09856963524814795, - "grad_norm": 4.403508186340332, - "learning_rate": 4.83571727458642e-05, - "loss": 1.0089, - "step": 11150 - }, - { - "epoch": 0.0986580385084602, - "grad_norm": 3.8178341388702393, - "learning_rate": 4.835569935819233e-05, - "loss": 1.0437, - "step": 11160 - }, - { - "epoch": 0.09874644176877244, - "grad_norm": 5.4979753494262695, - "learning_rate": 4.8354225970520465e-05, - "loss": 0.961, - "step": 11170 - }, - { - "epoch": 0.09883484502908467, - "grad_norm": 6.354760646820068, - "learning_rate": 4.835275258284859e-05, - "loss": 0.8086, - "step": 11180 - }, - { - "epoch": 0.09892324828939691, - "grad_norm": 3.0523722171783447, - "learning_rate": 4.835127919517672e-05, - "loss": 0.8666, - "step": 11190 - }, - { - "epoch": 0.09901165154970916, - "grad_norm": 6.463064193725586, - "learning_rate": 4.834980580750485e-05, - "loss": 0.8575, - "step": 11200 - }, - { - "epoch": 0.09910005481002139, - "grad_norm": 3.113537549972534, - "learning_rate": 4.834833241983298e-05, - "loss": 0.7764, - "step": 11210 - }, - { - "epoch": 0.09918845807033363, - "grad_norm": 6.625050067901611, - "learning_rate": 4.8346859032161107e-05, - "loss": 0.9465, - "step": 11220 - }, - { - "epoch": 0.09927686133064588, - "grad_norm": 3.369560480117798, - "learning_rate": 4.834538564448924e-05, - "loss": 0.8409, - "step": 11230 - }, - { - "epoch": 0.09936526459095811, - "grad_norm": 2.803184986114502, - "learning_rate": 4.834391225681737e-05, - "loss": 0.8473, - "step": 11240 - }, - { - "epoch": 0.09945366785127036, - "grad_norm": 2.738548517227173, - "learning_rate": 4.83424388691455e-05, - "loss": 0.8101, - "step": 11250 - }, - { - "epoch": 0.0995420711115826, - "grad_norm": 4.669358730316162, - "learning_rate": 4.834096548147363e-05, - "loss": 0.9251, - "step": 11260 - }, - { - "epoch": 0.09963047437189483, - "grad_norm": 10.148649215698242, - "learning_rate": 4.8339492093801755e-05, - "loss": 0.9198, - "step": 11270 - }, - { - "epoch": 0.09971887763220708, - "grad_norm": 9.202265739440918, - "learning_rate": 4.8338018706129883e-05, - "loss": 0.9573, - "step": 11280 - }, - { - "epoch": 0.09980728089251932, - "grad_norm": 6.037299633026123, - "learning_rate": 4.833654531845802e-05, - "loss": 0.7422, - "step": 11290 - }, - { - "epoch": 0.09989568415283155, - "grad_norm": 8.946399688720703, - "learning_rate": 4.833507193078614e-05, - "loss": 0.8829, - "step": 11300 - }, - { - "epoch": 0.0999840874131438, - "grad_norm": 6.282362937927246, - "learning_rate": 4.8333598543114275e-05, - "loss": 0.9511, - "step": 11310 - }, - { - "epoch": 0.10007249067345604, - "grad_norm": 5.291918754577637, - "learning_rate": 4.8332125155442404e-05, - "loss": 0.9102, - "step": 11320 - }, - { - "epoch": 0.10016089393376827, - "grad_norm": 4.973713397979736, - "learning_rate": 4.833065176777053e-05, - "loss": 0.9344, - "step": 11330 - }, - { - "epoch": 0.10024929719408052, - "grad_norm": 4.601949691772461, - "learning_rate": 4.832917838009866e-05, - "loss": 0.8408, - "step": 11340 - }, - { - "epoch": 0.10033770045439276, - "grad_norm": 7.364986896514893, - "learning_rate": 4.8327704992426795e-05, - "loss": 0.8399, - "step": 11350 - }, - { - "epoch": 0.10042610371470499, - "grad_norm": 5.818500995635986, - "learning_rate": 4.832623160475492e-05, - "loss": 0.7574, - "step": 11360 - }, - { - "epoch": 0.10051450697501724, - "grad_norm": 10.510309219360352, - "learning_rate": 4.832475821708305e-05, - "loss": 0.861, - "step": 11370 - }, - { - "epoch": 0.10060291023532948, - "grad_norm": 2.829066514968872, - "learning_rate": 4.8323284829411174e-05, - "loss": 0.8238, - "step": 11380 - }, - { - "epoch": 0.10069131349564171, - "grad_norm": 12.972811698913574, - "learning_rate": 4.832181144173931e-05, - "loss": 0.8737, - "step": 11390 - }, - { - "epoch": 0.10077971675595396, - "grad_norm": 6.312592506408691, - "learning_rate": 4.832033805406744e-05, - "loss": 0.8533, - "step": 11400 - }, - { - "epoch": 0.1008681200162662, - "grad_norm": 3.509385108947754, - "learning_rate": 4.8318864666395566e-05, - "loss": 0.836, - "step": 11410 - }, - { - "epoch": 0.10095652327657845, - "grad_norm": 4.647274494171143, - "learning_rate": 4.8317391278723694e-05, - "loss": 0.8366, - "step": 11420 - }, - { - "epoch": 0.10104492653689068, - "grad_norm": 12.710670471191406, - "learning_rate": 4.831591789105183e-05, - "loss": 0.8516, - "step": 11430 - }, - { - "epoch": 0.10113332979720292, - "grad_norm": 4.196151256561279, - "learning_rate": 4.831444450337995e-05, - "loss": 0.8707, - "step": 11440 - }, - { - "epoch": 0.10122173305751517, - "grad_norm": 3.9043242931365967, - "learning_rate": 4.8312971115708086e-05, - "loss": 0.9092, - "step": 11450 - }, - { - "epoch": 0.1013101363178274, - "grad_norm": 5.994088649749756, - "learning_rate": 4.8311497728036214e-05, - "loss": 0.9521, - "step": 11460 - }, - { - "epoch": 0.10139853957813964, - "grad_norm": 4.444270610809326, - "learning_rate": 4.831002434036434e-05, - "loss": 0.9715, - "step": 11470 - }, - { - "epoch": 0.10148694283845189, - "grad_norm": 6.034897804260254, - "learning_rate": 4.830855095269247e-05, - "loss": 0.8756, - "step": 11480 - }, - { - "epoch": 0.10157534609876412, - "grad_norm": 6.750916004180908, - "learning_rate": 4.83070775650206e-05, - "loss": 0.8579, - "step": 11490 - }, - { - "epoch": 0.10166374935907636, - "grad_norm": 4.86402702331543, - "learning_rate": 4.830560417734873e-05, - "loss": 0.8653, - "step": 11500 - }, - { - "epoch": 0.10175215261938861, - "grad_norm": 5.484646797180176, - "learning_rate": 4.830413078967686e-05, - "loss": 0.8304, - "step": 11510 - }, - { - "epoch": 0.10184055587970084, - "grad_norm": 10.572221755981445, - "learning_rate": 4.8302657402004984e-05, - "loss": 0.9065, - "step": 11520 - }, - { - "epoch": 0.10192895914001308, - "grad_norm": 4.579751014709473, - "learning_rate": 4.830118401433312e-05, - "loss": 0.9681, - "step": 11530 - }, - { - "epoch": 0.10201736240032533, - "grad_norm": 3.4729080200195312, - "learning_rate": 4.829971062666125e-05, - "loss": 0.8756, - "step": 11540 - }, - { - "epoch": 0.10210576566063756, - "grad_norm": 6.62471342086792, - "learning_rate": 4.8298237238989376e-05, - "loss": 0.8131, - "step": 11550 - }, - { - "epoch": 0.1021941689209498, - "grad_norm": 6.508872032165527, - "learning_rate": 4.8296763851317504e-05, - "loss": 1.0484, - "step": 11560 - }, - { - "epoch": 0.10228257218126205, - "grad_norm": 7.294726848602295, - "learning_rate": 4.829529046364564e-05, - "loss": 0.8732, - "step": 11570 - }, - { - "epoch": 0.10237097544157428, - "grad_norm": 6.782046318054199, - "learning_rate": 4.829381707597376e-05, - "loss": 0.9028, - "step": 11580 - }, - { - "epoch": 0.10245937870188652, - "grad_norm": 5.460529804229736, - "learning_rate": 4.8292343688301896e-05, - "loss": 0.949, - "step": 11590 - }, - { - "epoch": 0.10254778196219877, - "grad_norm": 5.221329689025879, - "learning_rate": 4.8290870300630025e-05, - "loss": 0.811, - "step": 11600 - }, - { - "epoch": 0.102636185222511, - "grad_norm": 5.675347805023193, - "learning_rate": 4.828939691295815e-05, - "loss": 0.9441, - "step": 11610 - }, - { - "epoch": 0.10272458848282325, - "grad_norm": 6.541971206665039, - "learning_rate": 4.828792352528628e-05, - "loss": 0.8346, - "step": 11620 - }, - { - "epoch": 0.10281299174313549, - "grad_norm": 2.437593936920166, - "learning_rate": 4.828645013761441e-05, - "loss": 0.8141, - "step": 11630 - }, - { - "epoch": 0.10290139500344772, - "grad_norm": 4.701081275939941, - "learning_rate": 4.828497674994254e-05, - "loss": 0.7186, - "step": 11640 - }, - { - "epoch": 0.10298979826375997, - "grad_norm": 8.1825532913208, - "learning_rate": 4.828350336227067e-05, - "loss": 0.9811, - "step": 11650 - }, - { - "epoch": 0.10307820152407221, - "grad_norm": 4.1160125732421875, - "learning_rate": 4.8282029974598795e-05, - "loss": 0.939, - "step": 11660 - }, - { - "epoch": 0.10316660478438444, - "grad_norm": 7.095728397369385, - "learning_rate": 4.828055658692693e-05, - "loss": 0.7836, - "step": 11670 - }, - { - "epoch": 0.10325500804469669, - "grad_norm": 10.679615020751953, - "learning_rate": 4.827908319925506e-05, - "loss": 0.8775, - "step": 11680 - }, - { - "epoch": 0.10334341130500893, - "grad_norm": 3.686213970184326, - "learning_rate": 4.8277609811583187e-05, - "loss": 0.8241, - "step": 11690 - }, - { - "epoch": 0.10343181456532118, - "grad_norm": 4.497840404510498, - "learning_rate": 4.8276136423911315e-05, - "loss": 0.795, - "step": 11700 - }, - { - "epoch": 0.1035202178256334, - "grad_norm": 5.162242889404297, - "learning_rate": 4.827466303623945e-05, - "loss": 0.9048, - "step": 11710 - }, - { - "epoch": 0.10360862108594565, - "grad_norm": 8.991860389709473, - "learning_rate": 4.827318964856757e-05, - "loss": 0.9184, - "step": 11720 - }, - { - "epoch": 0.1036970243462579, - "grad_norm": 5.413891315460205, - "learning_rate": 4.827171626089571e-05, - "loss": 0.8428, - "step": 11730 - }, - { - "epoch": 0.10378542760657013, - "grad_norm": 6.8643412590026855, - "learning_rate": 4.827024287322383e-05, - "loss": 0.9267, - "step": 11740 - }, - { - "epoch": 0.10387383086688237, - "grad_norm": 4.406735897064209, - "learning_rate": 4.8268769485551963e-05, - "loss": 0.9461, - "step": 11750 - }, - { - "epoch": 0.10396223412719462, - "grad_norm": 5.080199241638184, - "learning_rate": 4.826729609788009e-05, - "loss": 0.8362, - "step": 11760 - }, - { - "epoch": 0.10405063738750685, - "grad_norm": 4.05060338973999, - "learning_rate": 4.826582271020822e-05, - "loss": 0.7967, - "step": 11770 - }, - { - "epoch": 0.10413904064781909, - "grad_norm": 5.957949638366699, - "learning_rate": 4.826434932253635e-05, - "loss": 0.8772, - "step": 11780 - }, - { - "epoch": 0.10422744390813134, - "grad_norm": 3.757291555404663, - "learning_rate": 4.8262875934864484e-05, - "loss": 0.8104, - "step": 11790 - }, - { - "epoch": 0.10431584716844357, - "grad_norm": 6.859879016876221, - "learning_rate": 4.8261402547192605e-05, - "loss": 0.8481, - "step": 11800 - }, - { - "epoch": 0.10440425042875581, - "grad_norm": 3.6826179027557373, - "learning_rate": 4.825992915952074e-05, - "loss": 0.8707, - "step": 11810 - }, - { - "epoch": 0.10449265368906806, - "grad_norm": 2.7624754905700684, - "learning_rate": 4.825845577184887e-05, - "loss": 0.7285, - "step": 11820 - }, - { - "epoch": 0.10458105694938029, - "grad_norm": 4.933928966522217, - "learning_rate": 4.8256982384177e-05, - "loss": 0.8739, - "step": 11830 - }, - { - "epoch": 0.10466946020969253, - "grad_norm": 6.146503925323486, - "learning_rate": 4.8255508996505125e-05, - "loss": 0.8291, - "step": 11840 - }, - { - "epoch": 0.10475786347000478, - "grad_norm": 5.96083402633667, - "learning_rate": 4.8254035608833254e-05, - "loss": 0.7992, - "step": 11850 - }, - { - "epoch": 0.10484626673031701, - "grad_norm": 3.181730031967163, - "learning_rate": 4.825256222116138e-05, - "loss": 0.8775, - "step": 11860 - }, - { - "epoch": 0.10493466999062925, - "grad_norm": 4.656289100646973, - "learning_rate": 4.825108883348952e-05, - "loss": 0.9216, - "step": 11870 - }, - { - "epoch": 0.1050230732509415, - "grad_norm": 3.093475103378296, - "learning_rate": 4.824961544581764e-05, - "loss": 0.8623, - "step": 11880 - }, - { - "epoch": 0.10511147651125373, - "grad_norm": 10.09500503540039, - "learning_rate": 4.8248142058145774e-05, - "loss": 0.8994, - "step": 11890 - }, - { - "epoch": 0.10519987977156597, - "grad_norm": 8.591645240783691, - "learning_rate": 4.82466686704739e-05, - "loss": 0.7397, - "step": 11900 - }, - { - "epoch": 0.10528828303187822, - "grad_norm": 5.774130344390869, - "learning_rate": 4.824519528280203e-05, - "loss": 0.8121, - "step": 11910 - }, - { - "epoch": 0.10537668629219045, - "grad_norm": 4.944594383239746, - "learning_rate": 4.824372189513016e-05, - "loss": 0.7577, - "step": 11920 - }, - { - "epoch": 0.1054650895525027, - "grad_norm": 10.666511535644531, - "learning_rate": 4.8242248507458294e-05, - "loss": 0.8921, - "step": 11930 - }, - { - "epoch": 0.10555349281281494, - "grad_norm": 4.933461666107178, - "learning_rate": 4.8240775119786416e-05, - "loss": 0.9285, - "step": 11940 - }, - { - "epoch": 0.10564189607312717, - "grad_norm": 5.876432418823242, - "learning_rate": 4.823930173211455e-05, - "loss": 0.9049, - "step": 11950 - }, - { - "epoch": 0.10573029933343941, - "grad_norm": 2.357940673828125, - "learning_rate": 4.823782834444268e-05, - "loss": 0.8446, - "step": 11960 - }, - { - "epoch": 0.10581870259375166, - "grad_norm": 11.82457447052002, - "learning_rate": 4.823635495677081e-05, - "loss": 0.9313, - "step": 11970 - }, - { - "epoch": 0.1059071058540639, - "grad_norm": 7.283470153808594, - "learning_rate": 4.8234881569098936e-05, - "loss": 0.7034, - "step": 11980 - }, - { - "epoch": 0.10599550911437614, - "grad_norm": 5.469303131103516, - "learning_rate": 4.8233408181427064e-05, - "loss": 0.7876, - "step": 11990 - }, - { - "epoch": 0.10608391237468838, - "grad_norm": 6.705424785614014, - "learning_rate": 4.823193479375519e-05, - "loss": 0.81, - "step": 12000 - }, - { - "epoch": 0.10617231563500062, - "grad_norm": 11.108362197875977, - "learning_rate": 4.823046140608333e-05, - "loss": 0.8724, - "step": 12010 - }, - { - "epoch": 0.10626071889531286, - "grad_norm": 3.9691977500915527, - "learning_rate": 4.8228988018411456e-05, - "loss": 0.8841, - "step": 12020 - }, - { - "epoch": 0.1063491221556251, - "grad_norm": 4.791903972625732, - "learning_rate": 4.8227514630739585e-05, - "loss": 0.888, - "step": 12030 - }, - { - "epoch": 0.10643752541593735, - "grad_norm": 3.9145760536193848, - "learning_rate": 4.822604124306771e-05, - "loss": 0.8979, - "step": 12040 - }, - { - "epoch": 0.10652592867624958, - "grad_norm": 7.0544281005859375, - "learning_rate": 4.822456785539584e-05, - "loss": 0.7288, - "step": 12050 - }, - { - "epoch": 0.10661433193656182, - "grad_norm": 5.192449569702148, - "learning_rate": 4.822309446772397e-05, - "loss": 0.9639, - "step": 12060 - }, - { - "epoch": 0.10670273519687407, - "grad_norm": 5.621539115905762, - "learning_rate": 4.8221621080052105e-05, - "loss": 0.8467, - "step": 12070 - }, - { - "epoch": 0.1067911384571863, - "grad_norm": 4.238648891448975, - "learning_rate": 4.822014769238023e-05, - "loss": 0.8116, - "step": 12080 - }, - { - "epoch": 0.10687954171749854, - "grad_norm": 5.105109214782715, - "learning_rate": 4.821867430470836e-05, - "loss": 0.9827, - "step": 12090 - }, - { - "epoch": 0.10696794497781079, - "grad_norm": 5.218087196350098, - "learning_rate": 4.821720091703649e-05, - "loss": 0.9326, - "step": 12100 - }, - { - "epoch": 0.10705634823812302, - "grad_norm": 9.325528144836426, - "learning_rate": 4.821572752936462e-05, - "loss": 0.9618, - "step": 12110 - }, - { - "epoch": 0.10714475149843526, - "grad_norm": 4.683792591094971, - "learning_rate": 4.8214254141692746e-05, - "loss": 0.8442, - "step": 12120 - }, - { - "epoch": 0.1072331547587475, - "grad_norm": 5.67968225479126, - "learning_rate": 4.8212780754020875e-05, - "loss": 0.8684, - "step": 12130 - }, - { - "epoch": 0.10732155801905974, - "grad_norm": 5.964956760406494, - "learning_rate": 4.821130736634901e-05, - "loss": 0.8322, - "step": 12140 - }, - { - "epoch": 0.10740996127937198, - "grad_norm": 4.661000728607178, - "learning_rate": 4.820983397867714e-05, - "loss": 0.8833, - "step": 12150 - }, - { - "epoch": 0.10749836453968423, - "grad_norm": 7.7476420402526855, - "learning_rate": 4.820836059100527e-05, - "loss": 0.9018, - "step": 12160 - }, - { - "epoch": 0.10758676779999646, - "grad_norm": 3.875717878341675, - "learning_rate": 4.8206887203333395e-05, - "loss": 0.8409, - "step": 12170 - }, - { - "epoch": 0.1076751710603087, - "grad_norm": 3.472538948059082, - "learning_rate": 4.820541381566152e-05, - "loss": 0.8306, - "step": 12180 - }, - { - "epoch": 0.10776357432062095, - "grad_norm": 5.734470367431641, - "learning_rate": 4.820394042798965e-05, - "loss": 0.9054, - "step": 12190 - }, - { - "epoch": 0.10785197758093318, - "grad_norm": 3.783080577850342, - "learning_rate": 4.820246704031779e-05, - "loss": 0.833, - "step": 12200 - }, - { - "epoch": 0.10794038084124542, - "grad_norm": 7.443694114685059, - "learning_rate": 4.820099365264591e-05, - "loss": 0.9199, - "step": 12210 - }, - { - "epoch": 0.10802878410155767, - "grad_norm": 7.434304714202881, - "learning_rate": 4.8199520264974044e-05, - "loss": 0.8747, - "step": 12220 - }, - { - "epoch": 0.10811718736186991, - "grad_norm": 6.162380695343018, - "learning_rate": 4.819804687730217e-05, - "loss": 0.9705, - "step": 12230 - }, - { - "epoch": 0.10820559062218214, - "grad_norm": 7.422550201416016, - "learning_rate": 4.81965734896303e-05, - "loss": 0.8789, - "step": 12240 - }, - { - "epoch": 0.10829399388249439, - "grad_norm": 3.3764584064483643, - "learning_rate": 4.819510010195843e-05, - "loss": 0.8193, - "step": 12250 - }, - { - "epoch": 0.10838239714280663, - "grad_norm": 5.908439636230469, - "learning_rate": 4.8193626714286564e-05, - "loss": 0.9523, - "step": 12260 - }, - { - "epoch": 0.10847080040311886, - "grad_norm": 5.220740795135498, - "learning_rate": 4.8192153326614685e-05, - "loss": 0.7079, - "step": 12270 - }, - { - "epoch": 0.10855920366343111, - "grad_norm": 4.891901969909668, - "learning_rate": 4.819067993894282e-05, - "loss": 0.9311, - "step": 12280 - }, - { - "epoch": 0.10864760692374335, - "grad_norm": 2.5737478733062744, - "learning_rate": 4.818920655127095e-05, - "loss": 0.8561, - "step": 12290 - }, - { - "epoch": 0.10873601018405558, - "grad_norm": 3.4949209690093994, - "learning_rate": 4.818773316359908e-05, - "loss": 0.8153, - "step": 12300 - }, - { - "epoch": 0.10882441344436783, - "grad_norm": 4.714328289031982, - "learning_rate": 4.8186259775927206e-05, - "loss": 0.7879, - "step": 12310 - }, - { - "epoch": 0.10891281670468007, - "grad_norm": 5.5439958572387695, - "learning_rate": 4.8184786388255334e-05, - "loss": 0.8124, - "step": 12320 - }, - { - "epoch": 0.1090012199649923, - "grad_norm": 3.950995683670044, - "learning_rate": 4.818331300058346e-05, - "loss": 0.9348, - "step": 12330 - }, - { - "epoch": 0.10908962322530455, - "grad_norm": 3.3742518424987793, - "learning_rate": 4.81818396129116e-05, - "loss": 0.8962, - "step": 12340 - }, - { - "epoch": 0.1091780264856168, - "grad_norm": 3.243555784225464, - "learning_rate": 4.818036622523972e-05, - "loss": 0.868, - "step": 12350 - }, - { - "epoch": 0.10926642974592903, - "grad_norm": 5.580383777618408, - "learning_rate": 4.8178892837567854e-05, - "loss": 0.941, - "step": 12360 - }, - { - "epoch": 0.10935483300624127, - "grad_norm": 3.664823532104492, - "learning_rate": 4.817741944989598e-05, - "loss": 0.7888, - "step": 12370 - }, - { - "epoch": 0.10944323626655351, - "grad_norm": 4.724151134490967, - "learning_rate": 4.817594606222411e-05, - "loss": 0.8237, - "step": 12380 - }, - { - "epoch": 0.10953163952686575, - "grad_norm": 2.60208797454834, - "learning_rate": 4.817447267455224e-05, - "loss": 0.8005, - "step": 12390 - }, - { - "epoch": 0.10962004278717799, - "grad_norm": 7.287562370300293, - "learning_rate": 4.8172999286880374e-05, - "loss": 0.8841, - "step": 12400 - }, - { - "epoch": 0.10970844604749024, - "grad_norm": 5.459963321685791, - "learning_rate": 4.8171525899208496e-05, - "loss": 0.8946, - "step": 12410 - }, - { - "epoch": 0.10979684930780247, - "grad_norm": 5.479110240936279, - "learning_rate": 4.817005251153663e-05, - "loss": 0.8602, - "step": 12420 - }, - { - "epoch": 0.10988525256811471, - "grad_norm": 7.66988468170166, - "learning_rate": 4.816857912386475e-05, - "loss": 0.8133, - "step": 12430 - }, - { - "epoch": 0.10997365582842696, - "grad_norm": 5.17045783996582, - "learning_rate": 4.816710573619289e-05, - "loss": 0.6647, - "step": 12440 - }, - { - "epoch": 0.11006205908873919, - "grad_norm": 7.7116241455078125, - "learning_rate": 4.8165632348521016e-05, - "loss": 0.9312, - "step": 12450 - }, - { - "epoch": 0.11015046234905143, - "grad_norm": 5.106606483459473, - "learning_rate": 4.8164158960849144e-05, - "loss": 0.9675, - "step": 12460 - }, - { - "epoch": 0.11023886560936368, - "grad_norm": 4.0043721199035645, - "learning_rate": 4.816268557317727e-05, - "loss": 0.8789, - "step": 12470 - }, - { - "epoch": 0.1103272688696759, - "grad_norm": 3.060173273086548, - "learning_rate": 4.816121218550541e-05, - "loss": 0.7391, - "step": 12480 - }, - { - "epoch": 0.11041567212998815, - "grad_norm": 6.218465328216553, - "learning_rate": 4.815973879783353e-05, - "loss": 0.7834, - "step": 12490 - }, - { - "epoch": 0.1105040753903004, - "grad_norm": 6.211796283721924, - "learning_rate": 4.8158265410161665e-05, - "loss": 0.8404, - "step": 12500 - }, - { - "epoch": 0.11059247865061264, - "grad_norm": 10.186081886291504, - "learning_rate": 4.815679202248979e-05, - "loss": 0.8758, - "step": 12510 - }, - { - "epoch": 0.11068088191092487, - "grad_norm": 4.9329962730407715, - "learning_rate": 4.815531863481792e-05, - "loss": 0.9294, - "step": 12520 - }, - { - "epoch": 0.11076928517123712, - "grad_norm": 6.29591703414917, - "learning_rate": 4.815384524714605e-05, - "loss": 0.9319, - "step": 12530 - }, - { - "epoch": 0.11085768843154936, - "grad_norm": 4.162048816680908, - "learning_rate": 4.8152371859474185e-05, - "loss": 0.8672, - "step": 12540 - }, - { - "epoch": 0.11094609169186159, - "grad_norm": 5.286823272705078, - "learning_rate": 4.8150898471802306e-05, - "loss": 0.8435, - "step": 12550 - }, - { - "epoch": 0.11103449495217384, - "grad_norm": 13.852834701538086, - "learning_rate": 4.814942508413044e-05, - "loss": 0.922, - "step": 12560 - }, - { - "epoch": 0.11112289821248608, - "grad_norm": 7.8984270095825195, - "learning_rate": 4.814795169645856e-05, - "loss": 0.8533, - "step": 12570 - }, - { - "epoch": 0.11121130147279831, - "grad_norm": 3.034083604812622, - "learning_rate": 4.81464783087867e-05, - "loss": 0.7907, - "step": 12580 - }, - { - "epoch": 0.11129970473311056, - "grad_norm": 14.976812362670898, - "learning_rate": 4.8145004921114827e-05, - "loss": 0.9485, - "step": 12590 - }, - { - "epoch": 0.1113881079934228, - "grad_norm": 6.471790313720703, - "learning_rate": 4.8143531533442955e-05, - "loss": 0.8911, - "step": 12600 - }, - { - "epoch": 0.11147651125373503, - "grad_norm": 5.903090953826904, - "learning_rate": 4.814205814577108e-05, - "loss": 0.8458, - "step": 12610 - }, - { - "epoch": 0.11156491451404728, - "grad_norm": 3.115103244781494, - "learning_rate": 4.814058475809922e-05, - "loss": 0.6789, - "step": 12620 - }, - { - "epoch": 0.11165331777435952, - "grad_norm": 3.257558822631836, - "learning_rate": 4.813911137042734e-05, - "loss": 0.891, - "step": 12630 - }, - { - "epoch": 0.11174172103467175, - "grad_norm": 4.755401134490967, - "learning_rate": 4.8137637982755475e-05, - "loss": 0.8643, - "step": 12640 - }, - { - "epoch": 0.111830124294984, - "grad_norm": 6.717006683349609, - "learning_rate": 4.8136164595083603e-05, - "loss": 0.7305, - "step": 12650 - }, - { - "epoch": 0.11191852755529624, - "grad_norm": 6.6784234046936035, - "learning_rate": 4.813469120741173e-05, - "loss": 0.9982, - "step": 12660 - }, - { - "epoch": 0.11200693081560847, - "grad_norm": 6.589071273803711, - "learning_rate": 4.813321781973986e-05, - "loss": 0.7812, - "step": 12670 - }, - { - "epoch": 0.11209533407592072, - "grad_norm": 5.508271217346191, - "learning_rate": 4.813174443206799e-05, - "loss": 0.8973, - "step": 12680 - }, - { - "epoch": 0.11218373733623296, - "grad_norm": 12.26569938659668, - "learning_rate": 4.813027104439612e-05, - "loss": 0.8188, - "step": 12690 - }, - { - "epoch": 0.1122721405965452, - "grad_norm": 3.524712085723877, - "learning_rate": 4.812879765672425e-05, - "loss": 0.8285, - "step": 12700 - }, - { - "epoch": 0.11236054385685744, - "grad_norm": 12.058805465698242, - "learning_rate": 4.8127324269052374e-05, - "loss": 0.8275, - "step": 12710 - }, - { - "epoch": 0.11244894711716968, - "grad_norm": 9.321803092956543, - "learning_rate": 4.812585088138051e-05, - "loss": 0.9625, - "step": 12720 - }, - { - "epoch": 0.11253735037748192, - "grad_norm": 5.731935977935791, - "learning_rate": 4.812437749370864e-05, - "loss": 0.9214, - "step": 12730 - }, - { - "epoch": 0.11262575363779416, - "grad_norm": 5.440855979919434, - "learning_rate": 4.8122904106036765e-05, - "loss": 0.9237, - "step": 12740 - }, - { - "epoch": 0.1127141568981064, - "grad_norm": 4.87943172454834, - "learning_rate": 4.8121430718364894e-05, - "loss": 0.8803, - "step": 12750 - }, - { - "epoch": 0.11280256015841865, - "grad_norm": 9.990445137023926, - "learning_rate": 4.811995733069303e-05, - "loss": 0.7905, - "step": 12760 - }, - { - "epoch": 0.11289096341873088, - "grad_norm": 7.565195083618164, - "learning_rate": 4.811848394302115e-05, - "loss": 0.9275, - "step": 12770 - }, - { - "epoch": 0.11297936667904313, - "grad_norm": 9.698315620422363, - "learning_rate": 4.8117010555349286e-05, - "loss": 0.8425, - "step": 12780 - }, - { - "epoch": 0.11306776993935537, - "grad_norm": 4.264374732971191, - "learning_rate": 4.811553716767741e-05, - "loss": 0.8511, - "step": 12790 - }, - { - "epoch": 0.1131561731996676, - "grad_norm": 3.337890148162842, - "learning_rate": 4.811406378000554e-05, - "loss": 1.0133, - "step": 12800 - }, - { - "epoch": 0.11324457645997985, - "grad_norm": 3.5003468990325928, - "learning_rate": 4.811259039233367e-05, - "loss": 0.9532, - "step": 12810 - }, - { - "epoch": 0.11333297972029209, - "grad_norm": 3.5763087272644043, - "learning_rate": 4.81111170046618e-05, - "loss": 0.8774, - "step": 12820 - }, - { - "epoch": 0.11342138298060432, - "grad_norm": 5.544234275817871, - "learning_rate": 4.810964361698993e-05, - "loss": 0.8788, - "step": 12830 - }, - { - "epoch": 0.11350978624091657, - "grad_norm": 8.555781364440918, - "learning_rate": 4.810817022931806e-05, - "loss": 0.9446, - "step": 12840 - }, - { - "epoch": 0.11359818950122881, - "grad_norm": 2.7551257610321045, - "learning_rate": 4.8106696841646184e-05, - "loss": 0.9086, - "step": 12850 - }, - { - "epoch": 0.11368659276154104, - "grad_norm": 4.804686546325684, - "learning_rate": 4.810522345397432e-05, - "loss": 0.8292, - "step": 12860 - }, - { - "epoch": 0.11377499602185329, - "grad_norm": 6.941967487335205, - "learning_rate": 4.810375006630245e-05, - "loss": 0.7287, - "step": 12870 - }, - { - "epoch": 0.11386339928216553, - "grad_norm": 5.422636032104492, - "learning_rate": 4.8102276678630576e-05, - "loss": 0.7451, - "step": 12880 - }, - { - "epoch": 0.11395180254247776, - "grad_norm": 5.8049397468566895, - "learning_rate": 4.8100803290958704e-05, - "loss": 0.8179, - "step": 12890 - }, - { - "epoch": 0.11404020580279, - "grad_norm": 4.307275772094727, - "learning_rate": 4.809932990328683e-05, - "loss": 0.7998, - "step": 12900 - }, - { - "epoch": 0.11412860906310225, - "grad_norm": 5.677903175354004, - "learning_rate": 4.809785651561496e-05, - "loss": 0.9389, - "step": 12910 - }, - { - "epoch": 0.11421701232341448, - "grad_norm": 4.883234977722168, - "learning_rate": 4.8096383127943096e-05, - "loss": 1.0766, - "step": 12920 - }, - { - "epoch": 0.11430541558372673, - "grad_norm": 6.127511501312256, - "learning_rate": 4.8094909740271224e-05, - "loss": 0.8464, - "step": 12930 - }, - { - "epoch": 0.11439381884403897, - "grad_norm": 3.314553737640381, - "learning_rate": 4.809343635259935e-05, - "loss": 0.841, - "step": 12940 - }, - { - "epoch": 0.1144822221043512, - "grad_norm": 9.716215133666992, - "learning_rate": 4.809196296492748e-05, - "loss": 0.828, - "step": 12950 - }, - { - "epoch": 0.11457062536466345, - "grad_norm": 3.888396739959717, - "learning_rate": 4.809048957725561e-05, - "loss": 0.7595, - "step": 12960 - }, - { - "epoch": 0.11465902862497569, - "grad_norm": 10.254668235778809, - "learning_rate": 4.808901618958374e-05, - "loss": 0.806, - "step": 12970 - }, - { - "epoch": 0.11474743188528792, - "grad_norm": 5.166515350341797, - "learning_rate": 4.808754280191187e-05, - "loss": 0.8192, - "step": 12980 - }, - { - "epoch": 0.11483583514560017, - "grad_norm": 6.8301920890808105, - "learning_rate": 4.808606941424e-05, - "loss": 0.8468, - "step": 12990 - }, - { - "epoch": 0.11492423840591241, - "grad_norm": 6.361786365509033, - "learning_rate": 4.808459602656813e-05, - "loss": 0.9592, - "step": 13000 - }, - { - "epoch": 0.11501264166622464, - "grad_norm": 4.025946617126465, - "learning_rate": 4.808312263889626e-05, - "loss": 0.9441, - "step": 13010 - }, - { - "epoch": 0.11510104492653689, - "grad_norm": 2.533721685409546, - "learning_rate": 4.8081649251224386e-05, - "loss": 0.7379, - "step": 13020 - }, - { - "epoch": 0.11518944818684913, - "grad_norm": 13.652111053466797, - "learning_rate": 4.8080175863552515e-05, - "loss": 0.8379, - "step": 13030 - }, - { - "epoch": 0.11527785144716138, - "grad_norm": 5.801906585693359, - "learning_rate": 4.807870247588064e-05, - "loss": 0.8973, - "step": 13040 - }, - { - "epoch": 0.11536625470747361, - "grad_norm": 4.005748748779297, - "learning_rate": 4.807722908820878e-05, - "loss": 1.0268, - "step": 13050 - }, - { - "epoch": 0.11545465796778585, - "grad_norm": 19.381954193115234, - "learning_rate": 4.8075755700536907e-05, - "loss": 0.9041, - "step": 13060 - }, - { - "epoch": 0.1155430612280981, - "grad_norm": 4.4473981857299805, - "learning_rate": 4.8074282312865035e-05, - "loss": 0.8134, - "step": 13070 - }, - { - "epoch": 0.11563146448841033, - "grad_norm": 4.011990070343018, - "learning_rate": 4.807280892519316e-05, - "loss": 0.8182, - "step": 13080 - }, - { - "epoch": 0.11571986774872257, - "grad_norm": 6.486347198486328, - "learning_rate": 4.807133553752129e-05, - "loss": 0.8882, - "step": 13090 - }, - { - "epoch": 0.11580827100903482, - "grad_norm": 3.178736686706543, - "learning_rate": 4.806986214984942e-05, - "loss": 0.8041, - "step": 13100 - }, - { - "epoch": 0.11589667426934705, - "grad_norm": 4.373308181762695, - "learning_rate": 4.8068388762177555e-05, - "loss": 0.8849, - "step": 13110 - }, - { - "epoch": 0.1159850775296593, - "grad_norm": 5.344725608825684, - "learning_rate": 4.8066915374505684e-05, - "loss": 0.8081, - "step": 13120 - }, - { - "epoch": 0.11607348078997154, - "grad_norm": 5.3296356201171875, - "learning_rate": 4.806544198683381e-05, - "loss": 0.8636, - "step": 13130 - }, - { - "epoch": 0.11616188405028377, - "grad_norm": 4.61037015914917, - "learning_rate": 4.806396859916194e-05, - "loss": 0.7847, - "step": 13140 - }, - { - "epoch": 0.11625028731059601, - "grad_norm": 9.912908554077148, - "learning_rate": 4.806249521149007e-05, - "loss": 0.8597, - "step": 13150 - }, - { - "epoch": 0.11633869057090826, - "grad_norm": 4.560932159423828, - "learning_rate": 4.80610218238182e-05, - "loss": 0.8411, - "step": 13160 - }, - { - "epoch": 0.11642709383122049, - "grad_norm": 4.295501232147217, - "learning_rate": 4.805954843614633e-05, - "loss": 0.9372, - "step": 13170 - }, - { - "epoch": 0.11651549709153274, - "grad_norm": 6.329944133758545, - "learning_rate": 4.8058075048474454e-05, - "loss": 0.8476, - "step": 13180 - }, - { - "epoch": 0.11660390035184498, - "grad_norm": 2.4239487648010254, - "learning_rate": 4.805660166080259e-05, - "loss": 0.8588, - "step": 13190 - }, - { - "epoch": 0.11669230361215721, - "grad_norm": 3.171091318130493, - "learning_rate": 4.805512827313072e-05, - "loss": 0.8872, - "step": 13200 - }, - { - "epoch": 0.11678070687246946, - "grad_norm": 2.808485984802246, - "learning_rate": 4.8053654885458845e-05, - "loss": 0.7506, - "step": 13210 - }, - { - "epoch": 0.1168691101327817, - "grad_norm": 6.207040309906006, - "learning_rate": 4.8052181497786974e-05, - "loss": 0.9784, - "step": 13220 - }, - { - "epoch": 0.11695751339309393, - "grad_norm": 3.784930467605591, - "learning_rate": 4.805070811011511e-05, - "loss": 0.9089, - "step": 13230 - }, - { - "epoch": 0.11704591665340618, - "grad_norm": 4.8151044845581055, - "learning_rate": 4.804923472244323e-05, - "loss": 0.8305, - "step": 13240 - }, - { - "epoch": 0.11713431991371842, - "grad_norm": 4.724689483642578, - "learning_rate": 4.8047761334771366e-05, - "loss": 0.8698, - "step": 13250 - }, - { - "epoch": 0.11722272317403065, - "grad_norm": 6.355766773223877, - "learning_rate": 4.804628794709949e-05, - "loss": 0.8895, - "step": 13260 - }, - { - "epoch": 0.1173111264343429, - "grad_norm": 3.905327796936035, - "learning_rate": 4.804481455942762e-05, - "loss": 0.9173, - "step": 13270 - }, - { - "epoch": 0.11739952969465514, - "grad_norm": 6.199215412139893, - "learning_rate": 4.804334117175575e-05, - "loss": 0.9852, - "step": 13280 - }, - { - "epoch": 0.11748793295496737, - "grad_norm": 6.366835117340088, - "learning_rate": 4.804186778408388e-05, - "loss": 0.8567, - "step": 13290 - }, - { - "epoch": 0.11757633621527962, - "grad_norm": 4.903016567230225, - "learning_rate": 4.804039439641201e-05, - "loss": 0.8809, - "step": 13300 - }, - { - "epoch": 0.11766473947559186, - "grad_norm": 5.171665191650391, - "learning_rate": 4.803892100874014e-05, - "loss": 0.8251, - "step": 13310 - }, - { - "epoch": 0.1177531427359041, - "grad_norm": 10.476667404174805, - "learning_rate": 4.8037447621068264e-05, - "loss": 0.8059, - "step": 13320 - }, - { - "epoch": 0.11784154599621634, - "grad_norm": 3.8133482933044434, - "learning_rate": 4.80359742333964e-05, - "loss": 0.8106, - "step": 13330 - }, - { - "epoch": 0.11792994925652858, - "grad_norm": 8.48343563079834, - "learning_rate": 4.803450084572453e-05, - "loss": 0.9235, - "step": 13340 - }, - { - "epoch": 0.11801835251684083, - "grad_norm": 6.139083385467529, - "learning_rate": 4.8033027458052656e-05, - "loss": 0.7608, - "step": 13350 - }, - { - "epoch": 0.11810675577715306, - "grad_norm": 14.097145080566406, - "learning_rate": 4.8031554070380784e-05, - "loss": 0.841, - "step": 13360 - }, - { - "epoch": 0.1181951590374653, - "grad_norm": 7.063056468963623, - "learning_rate": 4.803008068270892e-05, - "loss": 0.8513, - "step": 13370 - }, - { - "epoch": 0.11828356229777755, - "grad_norm": 2.2021477222442627, - "learning_rate": 4.802860729503704e-05, - "loss": 0.9442, - "step": 13380 - }, - { - "epoch": 0.11837196555808978, - "grad_norm": 7.366992950439453, - "learning_rate": 4.8027133907365176e-05, - "loss": 0.9797, - "step": 13390 - }, - { - "epoch": 0.11846036881840202, - "grad_norm": 4.778909683227539, - "learning_rate": 4.80256605196933e-05, - "loss": 0.8654, - "step": 13400 - }, - { - "epoch": 0.11854877207871427, - "grad_norm": 6.330195426940918, - "learning_rate": 4.802418713202143e-05, - "loss": 0.912, - "step": 13410 - }, - { - "epoch": 0.1186371753390265, - "grad_norm": 3.0833261013031006, - "learning_rate": 4.802271374434956e-05, - "loss": 0.9457, - "step": 13420 - }, - { - "epoch": 0.11872557859933874, - "grad_norm": 2.9535045623779297, - "learning_rate": 4.802124035667769e-05, - "loss": 0.8395, - "step": 13430 - }, - { - "epoch": 0.11881398185965099, - "grad_norm": 3.640794038772583, - "learning_rate": 4.801976696900582e-05, - "loss": 0.9153, - "step": 13440 - }, - { - "epoch": 0.11890238511996322, - "grad_norm": 4.801519393920898, - "learning_rate": 4.801829358133395e-05, - "loss": 0.7588, - "step": 13450 - }, - { - "epoch": 0.11899078838027546, - "grad_norm": 6.261791706085205, - "learning_rate": 4.8016820193662075e-05, - "loss": 0.8941, - "step": 13460 - }, - { - "epoch": 0.11907919164058771, - "grad_norm": 5.783773422241211, - "learning_rate": 4.801534680599021e-05, - "loss": 0.8079, - "step": 13470 - }, - { - "epoch": 0.11916759490089994, - "grad_norm": 2.9788925647735596, - "learning_rate": 4.801387341831834e-05, - "loss": 0.8982, - "step": 13480 - }, - { - "epoch": 0.11925599816121218, - "grad_norm": 6.95315408706665, - "learning_rate": 4.8012400030646466e-05, - "loss": 0.9352, - "step": 13490 - }, - { - "epoch": 0.11934440142152443, - "grad_norm": 3.809793710708618, - "learning_rate": 4.8010926642974595e-05, - "loss": 0.9175, - "step": 13500 - }, - { - "epoch": 0.11943280468183666, - "grad_norm": 5.846870422363281, - "learning_rate": 4.800945325530272e-05, - "loss": 0.9252, - "step": 13510 - }, - { - "epoch": 0.1195212079421489, - "grad_norm": 2.3290843963623047, - "learning_rate": 4.800797986763085e-05, - "loss": 0.7772, - "step": 13520 - }, - { - "epoch": 0.11960961120246115, - "grad_norm": 3.1424500942230225, - "learning_rate": 4.800650647995899e-05, - "loss": 0.9232, - "step": 13530 - }, - { - "epoch": 0.11969801446277338, - "grad_norm": 5.945415019989014, - "learning_rate": 4.800503309228711e-05, - "loss": 0.8722, - "step": 13540 - }, - { - "epoch": 0.11978641772308563, - "grad_norm": 4.896644592285156, - "learning_rate": 4.800355970461524e-05, - "loss": 0.9078, - "step": 13550 - }, - { - "epoch": 0.11987482098339787, - "grad_norm": 3.6316447257995605, - "learning_rate": 4.800208631694337e-05, - "loss": 0.8587, - "step": 13560 - }, - { - "epoch": 0.11996322424371011, - "grad_norm": 5.4388017654418945, - "learning_rate": 4.80006129292715e-05, - "loss": 0.7887, - "step": 13570 - }, - { - "epoch": 0.12005162750402235, - "grad_norm": 5.254912376403809, - "learning_rate": 4.799913954159963e-05, - "loss": 0.8473, - "step": 13580 - }, - { - "epoch": 0.12014003076433459, - "grad_norm": 10.683406829833984, - "learning_rate": 4.7997666153927764e-05, - "loss": 0.7889, - "step": 13590 - }, - { - "epoch": 0.12022843402464684, - "grad_norm": 9.83441162109375, - "learning_rate": 4.7996192766255885e-05, - "loss": 0.7997, - "step": 13600 - }, - { - "epoch": 0.12031683728495907, - "grad_norm": 3.063049793243408, - "learning_rate": 4.799471937858402e-05, - "loss": 0.7573, - "step": 13610 - }, - { - "epoch": 0.12040524054527131, - "grad_norm": 6.09453010559082, - "learning_rate": 4.799324599091214e-05, - "loss": 0.7714, - "step": 13620 - }, - { - "epoch": 0.12049364380558356, - "grad_norm": 5.107629299163818, - "learning_rate": 4.799177260324028e-05, - "loss": 0.866, - "step": 13630 - }, - { - "epoch": 0.12058204706589579, - "grad_norm": 3.678056001663208, - "learning_rate": 4.7990299215568405e-05, - "loss": 0.8765, - "step": 13640 - }, - { - "epoch": 0.12067045032620803, - "grad_norm": 6.041098117828369, - "learning_rate": 4.7988825827896534e-05, - "loss": 0.8945, - "step": 13650 - }, - { - "epoch": 0.12075885358652028, - "grad_norm": 6.088251113891602, - "learning_rate": 4.798735244022466e-05, - "loss": 0.8592, - "step": 13660 - }, - { - "epoch": 0.12084725684683251, - "grad_norm": 6.8704423904418945, - "learning_rate": 4.79858790525528e-05, - "loss": 0.8, - "step": 13670 - }, - { - "epoch": 0.12093566010714475, - "grad_norm": 4.484743595123291, - "learning_rate": 4.798440566488092e-05, - "loss": 0.7832, - "step": 13680 - }, - { - "epoch": 0.121024063367457, - "grad_norm": 4.457681655883789, - "learning_rate": 4.7982932277209054e-05, - "loss": 0.8724, - "step": 13690 - }, - { - "epoch": 0.12111246662776923, - "grad_norm": 5.599424839019775, - "learning_rate": 4.798145888953718e-05, - "loss": 0.8824, - "step": 13700 - }, - { - "epoch": 0.12120086988808147, - "grad_norm": 3.0846285820007324, - "learning_rate": 4.797998550186531e-05, - "loss": 0.9783, - "step": 13710 - }, - { - "epoch": 0.12128927314839372, - "grad_norm": 3.4690675735473633, - "learning_rate": 4.797851211419344e-05, - "loss": 0.8959, - "step": 13720 - }, - { - "epoch": 0.12137767640870595, - "grad_norm": 4.735897064208984, - "learning_rate": 4.797703872652157e-05, - "loss": 0.8452, - "step": 13730 - }, - { - "epoch": 0.12146607966901819, - "grad_norm": 3.343179225921631, - "learning_rate": 4.7975565338849696e-05, - "loss": 0.8787, - "step": 13740 - }, - { - "epoch": 0.12155448292933044, - "grad_norm": 3.7673146724700928, - "learning_rate": 4.797409195117783e-05, - "loss": 1.0123, - "step": 13750 - }, - { - "epoch": 0.12164288618964267, - "grad_norm": 4.631723880767822, - "learning_rate": 4.797261856350595e-05, - "loss": 0.8697, - "step": 13760 - }, - { - "epoch": 0.12173128944995491, - "grad_norm": 13.146268844604492, - "learning_rate": 4.797114517583409e-05, - "loss": 0.7591, - "step": 13770 - }, - { - "epoch": 0.12181969271026716, - "grad_norm": 5.641947269439697, - "learning_rate": 4.7969671788162216e-05, - "loss": 0.8896, - "step": 13780 - }, - { - "epoch": 0.12190809597057939, - "grad_norm": 3.458003520965576, - "learning_rate": 4.7968198400490344e-05, - "loss": 0.825, - "step": 13790 - }, - { - "epoch": 0.12199649923089163, - "grad_norm": 5.369232654571533, - "learning_rate": 4.796672501281847e-05, - "loss": 0.9013, - "step": 13800 - }, - { - "epoch": 0.12208490249120388, - "grad_norm": 11.477063179016113, - "learning_rate": 4.796525162514661e-05, - "loss": 0.8588, - "step": 13810 - }, - { - "epoch": 0.12217330575151611, - "grad_norm": 3.9801762104034424, - "learning_rate": 4.796377823747473e-05, - "loss": 0.8606, - "step": 13820 - }, - { - "epoch": 0.12226170901182835, - "grad_norm": 6.173799991607666, - "learning_rate": 4.7962304849802864e-05, - "loss": 1.0348, - "step": 13830 - }, - { - "epoch": 0.1223501122721406, - "grad_norm": 3.98711895942688, - "learning_rate": 4.796083146213099e-05, - "loss": 0.858, - "step": 13840 - }, - { - "epoch": 0.12243851553245284, - "grad_norm": 9.050692558288574, - "learning_rate": 4.795935807445912e-05, - "loss": 0.9099, - "step": 13850 - }, - { - "epoch": 0.12252691879276507, - "grad_norm": 2.451476573944092, - "learning_rate": 4.795788468678725e-05, - "loss": 0.7765, - "step": 13860 - }, - { - "epoch": 0.12261532205307732, - "grad_norm": 4.387509346008301, - "learning_rate": 4.795641129911538e-05, - "loss": 0.8677, - "step": 13870 - }, - { - "epoch": 0.12270372531338956, - "grad_norm": 3.3893465995788574, - "learning_rate": 4.7954937911443506e-05, - "loss": 0.8658, - "step": 13880 - }, - { - "epoch": 0.1227921285737018, - "grad_norm": 6.2101898193359375, - "learning_rate": 4.795346452377164e-05, - "loss": 0.811, - "step": 13890 - }, - { - "epoch": 0.12288053183401404, - "grad_norm": 4.000254154205322, - "learning_rate": 4.795199113609977e-05, - "loss": 0.8461, - "step": 13900 - }, - { - "epoch": 0.12296893509432628, - "grad_norm": 4.265214920043945, - "learning_rate": 4.79505177484279e-05, - "loss": 0.9001, - "step": 13910 - }, - { - "epoch": 0.12305733835463852, - "grad_norm": 9.801916122436523, - "learning_rate": 4.7949044360756026e-05, - "loss": 0.9023, - "step": 13920 - }, - { - "epoch": 0.12314574161495076, - "grad_norm": 5.067219257354736, - "learning_rate": 4.7947570973084155e-05, - "loss": 0.7693, - "step": 13930 - }, - { - "epoch": 0.123234144875263, - "grad_norm": 5.618040084838867, - "learning_rate": 4.794609758541228e-05, - "loss": 0.9039, - "step": 13940 - }, - { - "epoch": 0.12332254813557524, - "grad_norm": 2.829528331756592, - "learning_rate": 4.794462419774042e-05, - "loss": 0.8986, - "step": 13950 - }, - { - "epoch": 0.12341095139588748, - "grad_norm": 6.598517894744873, - "learning_rate": 4.7943150810068547e-05, - "loss": 0.8015, - "step": 13960 - }, - { - "epoch": 0.12349935465619973, - "grad_norm": 8.449544906616211, - "learning_rate": 4.7941677422396675e-05, - "loss": 0.8606, - "step": 13970 - }, - { - "epoch": 0.12358775791651196, - "grad_norm": 3.056562662124634, - "learning_rate": 4.79402040347248e-05, - "loss": 0.8804, - "step": 13980 - }, - { - "epoch": 0.1236761611768242, - "grad_norm": 4.60211181640625, - "learning_rate": 4.793873064705293e-05, - "loss": 0.8114, - "step": 13990 - }, - { - "epoch": 0.12376456443713645, - "grad_norm": 11.740740776062012, - "learning_rate": 4.793725725938106e-05, - "loss": 0.835, - "step": 14000 - }, - { - "epoch": 0.12385296769744868, - "grad_norm": 4.6112775802612305, - "learning_rate": 4.793578387170919e-05, - "loss": 0.8879, - "step": 14010 - }, - { - "epoch": 0.12394137095776092, - "grad_norm": 10.848852157592773, - "learning_rate": 4.7934310484037323e-05, - "loss": 0.8923, - "step": 14020 - }, - { - "epoch": 0.12402977421807317, - "grad_norm": 3.880849599838257, - "learning_rate": 4.793283709636545e-05, - "loss": 0.8143, - "step": 14030 - }, - { - "epoch": 0.1241181774783854, - "grad_norm": 3.7946925163269043, - "learning_rate": 4.793136370869358e-05, - "loss": 0.893, - "step": 14040 - }, - { - "epoch": 0.12420658073869764, - "grad_norm": 3.187323808670044, - "learning_rate": 4.792989032102171e-05, - "loss": 0.9894, - "step": 14050 - }, - { - "epoch": 0.12429498399900989, - "grad_norm": 5.719008922576904, - "learning_rate": 4.792841693334984e-05, - "loss": 0.8328, - "step": 14060 - }, - { - "epoch": 0.12438338725932212, - "grad_norm": 1.9846103191375732, - "learning_rate": 4.7926943545677965e-05, - "loss": 0.6889, - "step": 14070 - }, - { - "epoch": 0.12447179051963436, - "grad_norm": 9.411588668823242, - "learning_rate": 4.79254701580061e-05, - "loss": 1.0346, - "step": 14080 - }, - { - "epoch": 0.1245601937799466, - "grad_norm": 4.604274272918701, - "learning_rate": 4.792399677033422e-05, - "loss": 0.8962, - "step": 14090 - }, - { - "epoch": 0.12464859704025885, - "grad_norm": 3.6185877323150635, - "learning_rate": 4.792252338266236e-05, - "loss": 0.9321, - "step": 14100 - }, - { - "epoch": 0.12473700030057108, - "grad_norm": 6.4194865226745605, - "learning_rate": 4.7921049994990485e-05, - "loss": 1.0308, - "step": 14110 - }, - { - "epoch": 0.12482540356088333, - "grad_norm": 7.311243534088135, - "learning_rate": 4.7919576607318614e-05, - "loss": 0.9134, - "step": 14120 - }, - { - "epoch": 0.12491380682119557, - "grad_norm": 2.457221508026123, - "learning_rate": 4.791810321964674e-05, - "loss": 0.8825, - "step": 14130 - }, - { - "epoch": 0.1250022100815078, - "grad_norm": 9.435111999511719, - "learning_rate": 4.791662983197488e-05, - "loss": 0.8359, - "step": 14140 - }, - { - "epoch": 0.12509061334182003, - "grad_norm": 4.99104642868042, - "learning_rate": 4.7915156444303e-05, - "loss": 0.9245, - "step": 14150 - }, - { - "epoch": 0.1251790166021323, - "grad_norm": 7.287257194519043, - "learning_rate": 4.7913683056631134e-05, - "loss": 0.8291, - "step": 14160 - }, - { - "epoch": 0.12526741986244452, - "grad_norm": 8.82583999633789, - "learning_rate": 4.791220966895926e-05, - "loss": 0.9718, - "step": 14170 - }, - { - "epoch": 0.12535582312275675, - "grad_norm": 2.8207483291625977, - "learning_rate": 4.791073628128739e-05, - "loss": 0.9119, - "step": 14180 - }, - { - "epoch": 0.125444226383069, - "grad_norm": 3.5591232776641846, - "learning_rate": 4.790926289361552e-05, - "loss": 0.8609, - "step": 14190 - }, - { - "epoch": 0.12553262964338124, - "grad_norm": 3.5088720321655273, - "learning_rate": 4.790778950594365e-05, - "loss": 0.8482, - "step": 14200 - }, - { - "epoch": 0.12562103290369347, - "grad_norm": 2.4947993755340576, - "learning_rate": 4.7906316118271776e-05, - "loss": 0.7897, - "step": 14210 - }, - { - "epoch": 0.12570943616400573, - "grad_norm": 3.704815149307251, - "learning_rate": 4.790484273059991e-05, - "loss": 0.8861, - "step": 14220 - }, - { - "epoch": 0.12579783942431796, - "grad_norm": 3.8462188243865967, - "learning_rate": 4.790336934292803e-05, - "loss": 0.7991, - "step": 14230 - }, - { - "epoch": 0.12588624268463022, - "grad_norm": 3.9030914306640625, - "learning_rate": 4.790189595525617e-05, - "loss": 0.8342, - "step": 14240 - }, - { - "epoch": 0.12597464594494245, - "grad_norm": 9.146306037902832, - "learning_rate": 4.7900422567584296e-05, - "loss": 1.004, - "step": 14250 - }, - { - "epoch": 0.12606304920525468, - "grad_norm": 4.142906188964844, - "learning_rate": 4.7898949179912424e-05, - "loss": 0.8683, - "step": 14260 - }, - { - "epoch": 0.12615145246556694, - "grad_norm": 4.316192626953125, - "learning_rate": 4.789747579224055e-05, - "loss": 0.794, - "step": 14270 - }, - { - "epoch": 0.12623985572587917, - "grad_norm": 2.6158251762390137, - "learning_rate": 4.789600240456869e-05, - "loss": 0.8485, - "step": 14280 - }, - { - "epoch": 0.1263282589861914, - "grad_norm": 6.301787853240967, - "learning_rate": 4.789452901689681e-05, - "loss": 0.8897, - "step": 14290 - }, - { - "epoch": 0.12641666224650366, - "grad_norm": 3.070772886276245, - "learning_rate": 4.7893055629224944e-05, - "loss": 0.8201, - "step": 14300 - }, - { - "epoch": 0.1265050655068159, - "grad_norm": 4.846888065338135, - "learning_rate": 4.789158224155307e-05, - "loss": 0.9017, - "step": 14310 - }, - { - "epoch": 0.12659346876712813, - "grad_norm": 6.518237590789795, - "learning_rate": 4.78901088538812e-05, - "loss": 0.9906, - "step": 14320 - }, - { - "epoch": 0.12668187202744038, - "grad_norm": 3.3887789249420166, - "learning_rate": 4.788863546620933e-05, - "loss": 0.8593, - "step": 14330 - }, - { - "epoch": 0.12677027528775262, - "grad_norm": 3.343748092651367, - "learning_rate": 4.788716207853746e-05, - "loss": 0.8383, - "step": 14340 - }, - { - "epoch": 0.12685867854806485, - "grad_norm": 2.786721706390381, - "learning_rate": 4.7885688690865586e-05, - "loss": 0.8111, - "step": 14350 - }, - { - "epoch": 0.1269470818083771, - "grad_norm": 3.8152003288269043, - "learning_rate": 4.788421530319372e-05, - "loss": 0.8315, - "step": 14360 - }, - { - "epoch": 0.12703548506868934, - "grad_norm": 3.993624687194824, - "learning_rate": 4.788274191552184e-05, - "loss": 0.88, - "step": 14370 - }, - { - "epoch": 0.12712388832900157, - "grad_norm": 4.20762825012207, - "learning_rate": 4.788126852784998e-05, - "loss": 0.8888, - "step": 14380 - }, - { - "epoch": 0.12721229158931383, - "grad_norm": 7.828494548797607, - "learning_rate": 4.7879795140178106e-05, - "loss": 0.8765, - "step": 14390 - }, - { - "epoch": 0.12730069484962606, - "grad_norm": 2.8013503551483154, - "learning_rate": 4.7878321752506235e-05, - "loss": 0.908, - "step": 14400 - }, - { - "epoch": 0.1273890981099383, - "grad_norm": 6.353537559509277, - "learning_rate": 4.787684836483436e-05, - "loss": 0.8286, - "step": 14410 - }, - { - "epoch": 0.12747750137025055, - "grad_norm": 4.11337423324585, - "learning_rate": 4.78753749771625e-05, - "loss": 0.9107, - "step": 14420 - }, - { - "epoch": 0.12756590463056278, - "grad_norm": 4.779507160186768, - "learning_rate": 4.787390158949062e-05, - "loss": 0.7006, - "step": 14430 - }, - { - "epoch": 0.127654307890875, - "grad_norm": 5.153433322906494, - "learning_rate": 4.7872428201818755e-05, - "loss": 0.8357, - "step": 14440 - }, - { - "epoch": 0.12774271115118727, - "grad_norm": 6.731582164764404, - "learning_rate": 4.7870954814146877e-05, - "loss": 0.867, - "step": 14450 - }, - { - "epoch": 0.1278311144114995, - "grad_norm": 2.1910324096679688, - "learning_rate": 4.786948142647501e-05, - "loss": 0.9936, - "step": 14460 - }, - { - "epoch": 0.12791951767181173, - "grad_norm": 4.246039390563965, - "learning_rate": 4.786800803880314e-05, - "loss": 0.8295, - "step": 14470 - }, - { - "epoch": 0.128007920932124, - "grad_norm": 3.4341440200805664, - "learning_rate": 4.786653465113127e-05, - "loss": 0.7022, - "step": 14480 - }, - { - "epoch": 0.12809632419243622, - "grad_norm": 3.687922716140747, - "learning_rate": 4.78650612634594e-05, - "loss": 0.962, - "step": 14490 - }, - { - "epoch": 0.12818472745274845, - "grad_norm": 5.527141094207764, - "learning_rate": 4.786358787578753e-05, - "loss": 0.8514, - "step": 14500 - }, - { - "epoch": 0.1282731307130607, - "grad_norm": 3.9552197456359863, - "learning_rate": 4.7862114488115653e-05, - "loss": 0.7996, - "step": 14510 - }, - { - "epoch": 0.12836153397337294, - "grad_norm": 4.820691108703613, - "learning_rate": 4.786064110044379e-05, - "loss": 0.86, - "step": 14520 - }, - { - "epoch": 0.12844993723368517, - "grad_norm": 4.065998077392578, - "learning_rate": 4.785916771277192e-05, - "loss": 0.799, - "step": 14530 - }, - { - "epoch": 0.12853834049399743, - "grad_norm": 17.629016876220703, - "learning_rate": 4.7857694325100045e-05, - "loss": 0.9446, - "step": 14540 - }, - { - "epoch": 0.12862674375430966, - "grad_norm": 7.211923599243164, - "learning_rate": 4.7856220937428174e-05, - "loss": 0.8793, - "step": 14550 - }, - { - "epoch": 0.1287151470146219, - "grad_norm": 1.7147059440612793, - "learning_rate": 4.78547475497563e-05, - "loss": 0.7855, - "step": 14560 - }, - { - "epoch": 0.12880355027493415, - "grad_norm": 5.725169658660889, - "learning_rate": 4.785327416208443e-05, - "loss": 0.8838, - "step": 14570 - }, - { - "epoch": 0.12889195353524638, - "grad_norm": 5.495853900909424, - "learning_rate": 4.7851800774412565e-05, - "loss": 0.9136, - "step": 14580 - }, - { - "epoch": 0.1289803567955586, - "grad_norm": 6.933687210083008, - "learning_rate": 4.785032738674069e-05, - "loss": 0.8623, - "step": 14590 - }, - { - "epoch": 0.12906876005587087, - "grad_norm": 6.265336513519287, - "learning_rate": 4.784885399906882e-05, - "loss": 0.7843, - "step": 14600 - }, - { - "epoch": 0.1291571633161831, - "grad_norm": 4.088986396789551, - "learning_rate": 4.784738061139695e-05, - "loss": 0.9676, - "step": 14610 - }, - { - "epoch": 0.12924556657649533, - "grad_norm": 3.6620850563049316, - "learning_rate": 4.784590722372508e-05, - "loss": 0.898, - "step": 14620 - }, - { - "epoch": 0.1293339698368076, - "grad_norm": 4.730831146240234, - "learning_rate": 4.784443383605321e-05, - "loss": 0.9408, - "step": 14630 - }, - { - "epoch": 0.12942237309711982, - "grad_norm": 3.718355417251587, - "learning_rate": 4.784296044838134e-05, - "loss": 0.898, - "step": 14640 - }, - { - "epoch": 0.12951077635743205, - "grad_norm": 5.269217491149902, - "learning_rate": 4.7841487060709464e-05, - "loss": 0.7567, - "step": 14650 - }, - { - "epoch": 0.1295991796177443, - "grad_norm": 5.338615417480469, - "learning_rate": 4.78400136730376e-05, - "loss": 0.8754, - "step": 14660 - }, - { - "epoch": 0.12968758287805654, - "grad_norm": 2.691253900527954, - "learning_rate": 4.783854028536572e-05, - "loss": 0.8197, - "step": 14670 - }, - { - "epoch": 0.12977598613836877, - "grad_norm": 4.899257183074951, - "learning_rate": 4.7837066897693856e-05, - "loss": 1.0636, - "step": 14680 - }, - { - "epoch": 0.12986438939868103, - "grad_norm": 7.4292168617248535, - "learning_rate": 4.7835593510021984e-05, - "loss": 0.8505, - "step": 14690 - }, - { - "epoch": 0.12995279265899326, - "grad_norm": 5.259484767913818, - "learning_rate": 4.783412012235011e-05, - "loss": 0.8914, - "step": 14700 - }, - { - "epoch": 0.1300411959193055, - "grad_norm": 2.593456506729126, - "learning_rate": 4.783264673467824e-05, - "loss": 0.8399, - "step": 14710 - }, - { - "epoch": 0.13012959917961775, - "grad_norm": 4.29045295715332, - "learning_rate": 4.7831173347006376e-05, - "loss": 0.8655, - "step": 14720 - }, - { - "epoch": 0.13021800243992998, - "grad_norm": 5.755084037780762, - "learning_rate": 4.78296999593345e-05, - "loss": 0.827, - "step": 14730 - }, - { - "epoch": 0.1303064057002422, - "grad_norm": 8.260836601257324, - "learning_rate": 4.782822657166263e-05, - "loss": 0.892, - "step": 14740 - }, - { - "epoch": 0.13039480896055447, - "grad_norm": 5.283960819244385, - "learning_rate": 4.782675318399076e-05, - "loss": 0.7857, - "step": 14750 - }, - { - "epoch": 0.1304832122208667, - "grad_norm": 3.982168197631836, - "learning_rate": 4.782527979631889e-05, - "loss": 0.7798, - "step": 14760 - }, - { - "epoch": 0.13057161548117893, - "grad_norm": 10.328381538391113, - "learning_rate": 4.782380640864702e-05, - "loss": 1.0108, - "step": 14770 - }, - { - "epoch": 0.1306600187414912, - "grad_norm": 4.827663898468018, - "learning_rate": 4.782233302097515e-05, - "loss": 0.7952, - "step": 14780 - }, - { - "epoch": 0.13074842200180342, - "grad_norm": 5.2243523597717285, - "learning_rate": 4.7820859633303274e-05, - "loss": 0.878, - "step": 14790 - }, - { - "epoch": 0.13083682526211568, - "grad_norm": 6.853215217590332, - "learning_rate": 4.781938624563141e-05, - "loss": 0.7715, - "step": 14800 - }, - { - "epoch": 0.1309252285224279, - "grad_norm": 8.641997337341309, - "learning_rate": 4.781791285795954e-05, - "loss": 0.9502, - "step": 14810 - }, - { - "epoch": 0.13101363178274014, - "grad_norm": 7.017167091369629, - "learning_rate": 4.7816439470287666e-05, - "loss": 0.7975, - "step": 14820 - }, - { - "epoch": 0.1311020350430524, - "grad_norm": 5.059592247009277, - "learning_rate": 4.7814966082615795e-05, - "loss": 0.8215, - "step": 14830 - }, - { - "epoch": 0.13119043830336463, - "grad_norm": 2.984628200531006, - "learning_rate": 4.781349269494392e-05, - "loss": 0.78, - "step": 14840 - }, - { - "epoch": 0.13127884156367686, - "grad_norm": 3.692122220993042, - "learning_rate": 4.781201930727205e-05, - "loss": 0.6725, - "step": 14850 - }, - { - "epoch": 0.13136724482398912, - "grad_norm": 10.22104263305664, - "learning_rate": 4.7810545919600187e-05, - "loss": 0.8766, - "step": 14860 - }, - { - "epoch": 0.13145564808430135, - "grad_norm": 5.891976833343506, - "learning_rate": 4.7809072531928315e-05, - "loss": 0.7219, - "step": 14870 - }, - { - "epoch": 0.13154405134461358, - "grad_norm": 11.42141056060791, - "learning_rate": 4.780759914425644e-05, - "loss": 0.8912, - "step": 14880 - }, - { - "epoch": 0.13163245460492584, - "grad_norm": 3.748466730117798, - "learning_rate": 4.780612575658457e-05, - "loss": 0.919, - "step": 14890 - }, - { - "epoch": 0.13172085786523807, - "grad_norm": 8.474726676940918, - "learning_rate": 4.78046523689127e-05, - "loss": 0.9573, - "step": 14900 - }, - { - "epoch": 0.1318092611255503, - "grad_norm": 9.216867446899414, - "learning_rate": 4.780317898124083e-05, - "loss": 0.7432, - "step": 14910 - }, - { - "epoch": 0.13189766438586256, - "grad_norm": 1.9129865169525146, - "learning_rate": 4.780170559356896e-05, - "loss": 1.0191, - "step": 14920 - }, - { - "epoch": 0.1319860676461748, - "grad_norm": 4.176100730895996, - "learning_rate": 4.780023220589709e-05, - "loss": 0.8914, - "step": 14930 - }, - { - "epoch": 0.13207447090648702, - "grad_norm": 6.394561290740967, - "learning_rate": 4.779875881822522e-05, - "loss": 0.8583, - "step": 14940 - }, - { - "epoch": 0.13216287416679928, - "grad_norm": 3.249783992767334, - "learning_rate": 4.779728543055335e-05, - "loss": 0.8655, - "step": 14950 - }, - { - "epoch": 0.1322512774271115, - "grad_norm": 9.301237106323242, - "learning_rate": 4.779581204288148e-05, - "loss": 0.815, - "step": 14960 - }, - { - "epoch": 0.13233968068742374, - "grad_norm": 2.9319992065429688, - "learning_rate": 4.7794338655209605e-05, - "loss": 0.8361, - "step": 14970 - }, - { - "epoch": 0.132428083947736, - "grad_norm": 4.976524829864502, - "learning_rate": 4.7792865267537734e-05, - "loss": 0.7031, - "step": 14980 - }, - { - "epoch": 0.13251648720804823, - "grad_norm": 5.306775093078613, - "learning_rate": 4.779139187986587e-05, - "loss": 0.8439, - "step": 14990 - }, - { - "epoch": 0.13260489046836046, - "grad_norm": 2.589348554611206, - "learning_rate": 4.7789918492194e-05, - "loss": 0.8209, - "step": 15000 - }, - { - "epoch": 0.13269329372867272, - "grad_norm": 5.116392135620117, - "learning_rate": 4.7788445104522125e-05, - "loss": 0.9382, - "step": 15010 - }, - { - "epoch": 0.13278169698898495, - "grad_norm": 4.212767601013184, - "learning_rate": 4.7786971716850254e-05, - "loss": 0.7489, - "step": 15020 - }, - { - "epoch": 0.13287010024929718, - "grad_norm": 3.410351514816284, - "learning_rate": 4.778549832917838e-05, - "loss": 0.8789, - "step": 15030 - }, - { - "epoch": 0.13295850350960944, - "grad_norm": 4.694933891296387, - "learning_rate": 4.778402494150651e-05, - "loss": 0.8327, - "step": 15040 - }, - { - "epoch": 0.13304690676992167, - "grad_norm": 1.849266529083252, - "learning_rate": 4.7782551553834646e-05, - "loss": 0.7674, - "step": 15050 - }, - { - "epoch": 0.1331353100302339, - "grad_norm": 4.732289791107178, - "learning_rate": 4.778107816616277e-05, - "loss": 0.761, - "step": 15060 - }, - { - "epoch": 0.13322371329054616, - "grad_norm": 5.243071556091309, - "learning_rate": 4.77796047784909e-05, - "loss": 0.8116, - "step": 15070 - }, - { - "epoch": 0.1333121165508584, - "grad_norm": 3.66876482963562, - "learning_rate": 4.777813139081903e-05, - "loss": 0.8861, - "step": 15080 - }, - { - "epoch": 0.13340051981117063, - "grad_norm": 5.465664386749268, - "learning_rate": 4.777665800314716e-05, - "loss": 0.8703, - "step": 15090 - }, - { - "epoch": 0.13348892307148288, - "grad_norm": 4.7874627113342285, - "learning_rate": 4.777518461547529e-05, - "loss": 0.7911, - "step": 15100 - }, - { - "epoch": 0.13357732633179512, - "grad_norm": 4.305376052856445, - "learning_rate": 4.777371122780342e-05, - "loss": 0.8853, - "step": 15110 - }, - { - "epoch": 0.13366572959210735, - "grad_norm": 4.703049659729004, - "learning_rate": 4.7772237840131544e-05, - "loss": 0.7474, - "step": 15120 - }, - { - "epoch": 0.1337541328524196, - "grad_norm": 4.865607261657715, - "learning_rate": 4.777076445245968e-05, - "loss": 0.8021, - "step": 15130 - }, - { - "epoch": 0.13384253611273184, - "grad_norm": 4.444889068603516, - "learning_rate": 4.77692910647878e-05, - "loss": 0.9501, - "step": 15140 - }, - { - "epoch": 0.13393093937304407, - "grad_norm": 5.616090297698975, - "learning_rate": 4.7767817677115936e-05, - "loss": 0.8098, - "step": 15150 - }, - { - "epoch": 0.13401934263335633, - "grad_norm": 8.412457466125488, - "learning_rate": 4.7766344289444064e-05, - "loss": 0.9875, - "step": 15160 - }, - { - "epoch": 0.13410774589366856, - "grad_norm": 2.9461898803710938, - "learning_rate": 4.776487090177219e-05, - "loss": 0.8954, - "step": 15170 - }, - { - "epoch": 0.1341961491539808, - "grad_norm": 4.904505729675293, - "learning_rate": 4.776339751410032e-05, - "loss": 0.7522, - "step": 15180 - }, - { - "epoch": 0.13428455241429305, - "grad_norm": 8.480589866638184, - "learning_rate": 4.7761924126428456e-05, - "loss": 0.7521, - "step": 15190 - }, - { - "epoch": 0.13437295567460528, - "grad_norm": 4.232245922088623, - "learning_rate": 4.776045073875658e-05, - "loss": 0.7521, - "step": 15200 - }, - { - "epoch": 0.1344613589349175, - "grad_norm": 12.435349464416504, - "learning_rate": 4.775897735108471e-05, - "loss": 0.8171, - "step": 15210 - }, - { - "epoch": 0.13454976219522977, - "grad_norm": 3.5983023643493652, - "learning_rate": 4.775750396341284e-05, - "loss": 0.812, - "step": 15220 - }, - { - "epoch": 0.134638165455542, - "grad_norm": 8.229449272155762, - "learning_rate": 4.775603057574097e-05, - "loss": 0.7887, - "step": 15230 - }, - { - "epoch": 0.13472656871585423, - "grad_norm": 13.410287857055664, - "learning_rate": 4.77545571880691e-05, - "loss": 0.8531, - "step": 15240 - }, - { - "epoch": 0.1348149719761665, - "grad_norm": 13.72982120513916, - "learning_rate": 4.775308380039723e-05, - "loss": 0.9009, - "step": 15250 - }, - { - "epoch": 0.13490337523647872, - "grad_norm": 4.8448028564453125, - "learning_rate": 4.7751610412725355e-05, - "loss": 0.8003, - "step": 15260 - }, - { - "epoch": 0.13499177849679095, - "grad_norm": 4.699975490570068, - "learning_rate": 4.775013702505349e-05, - "loss": 0.8005, - "step": 15270 - }, - { - "epoch": 0.1350801817571032, - "grad_norm": 3.7422728538513184, - "learning_rate": 4.774866363738161e-05, - "loss": 0.9568, - "step": 15280 - }, - { - "epoch": 0.13516858501741544, - "grad_norm": 5.041273593902588, - "learning_rate": 4.7747190249709746e-05, - "loss": 0.8933, - "step": 15290 - }, - { - "epoch": 0.13525698827772767, - "grad_norm": 4.4108428955078125, - "learning_rate": 4.7745716862037875e-05, - "loss": 0.8552, - "step": 15300 - }, - { - "epoch": 0.13534539153803993, - "grad_norm": 8.836749076843262, - "learning_rate": 4.7744243474366e-05, - "loss": 0.8533, - "step": 15310 - }, - { - "epoch": 0.13543379479835216, - "grad_norm": 3.6807878017425537, - "learning_rate": 4.774277008669413e-05, - "loss": 0.908, - "step": 15320 - }, - { - "epoch": 0.13552219805866442, - "grad_norm": 5.754607200622559, - "learning_rate": 4.7741296699022267e-05, - "loss": 0.721, - "step": 15330 - }, - { - "epoch": 0.13561060131897665, - "grad_norm": 5.532271862030029, - "learning_rate": 4.773982331135039e-05, - "loss": 1.0477, - "step": 15340 - }, - { - "epoch": 0.13569900457928888, - "grad_norm": 3.1188676357269287, - "learning_rate": 4.773834992367852e-05, - "loss": 0.8783, - "step": 15350 - }, - { - "epoch": 0.13578740783960114, - "grad_norm": 4.697937488555908, - "learning_rate": 4.773687653600665e-05, - "loss": 0.8323, - "step": 15360 - }, - { - "epoch": 0.13587581109991337, - "grad_norm": 12.706995964050293, - "learning_rate": 4.773540314833478e-05, - "loss": 0.801, - "step": 15370 - }, - { - "epoch": 0.1359642143602256, - "grad_norm": 6.7111735343933105, - "learning_rate": 4.773392976066291e-05, - "loss": 0.7436, - "step": 15380 - }, - { - "epoch": 0.13605261762053786, - "grad_norm": 4.693384170532227, - "learning_rate": 4.773245637299104e-05, - "loss": 0.7366, - "step": 15390 - }, - { - "epoch": 0.1361410208808501, - "grad_norm": 3.8970184326171875, - "learning_rate": 4.7730982985319165e-05, - "loss": 0.6114, - "step": 15400 - }, - { - "epoch": 0.13622942414116232, - "grad_norm": 7.473209857940674, - "learning_rate": 4.77295095976473e-05, - "loss": 0.9589, - "step": 15410 - }, - { - "epoch": 0.13631782740147458, - "grad_norm": 3.8958041667938232, - "learning_rate": 4.772803620997542e-05, - "loss": 0.9313, - "step": 15420 - }, - { - "epoch": 0.1364062306617868, - "grad_norm": 4.38140869140625, - "learning_rate": 4.772656282230356e-05, - "loss": 0.7831, - "step": 15430 - }, - { - "epoch": 0.13649463392209904, - "grad_norm": 3.4247639179229736, - "learning_rate": 4.7725089434631685e-05, - "loss": 0.8002, - "step": 15440 - }, - { - "epoch": 0.1365830371824113, - "grad_norm": 5.48562479019165, - "learning_rate": 4.7723616046959814e-05, - "loss": 0.7916, - "step": 15450 - }, - { - "epoch": 0.13667144044272353, - "grad_norm": 7.411536693572998, - "learning_rate": 4.772214265928794e-05, - "loss": 0.8044, - "step": 15460 - }, - { - "epoch": 0.13675984370303576, - "grad_norm": 2.3300602436065674, - "learning_rate": 4.772066927161608e-05, - "loss": 0.75, - "step": 15470 - }, - { - "epoch": 0.13684824696334802, - "grad_norm": 3.276644468307495, - "learning_rate": 4.77191958839442e-05, - "loss": 0.8477, - "step": 15480 - }, - { - "epoch": 0.13693665022366025, - "grad_norm": 4.852524280548096, - "learning_rate": 4.7717722496272334e-05, - "loss": 0.8006, - "step": 15490 - }, - { - "epoch": 0.13702505348397248, - "grad_norm": 10.70208740234375, - "learning_rate": 4.7716249108600455e-05, - "loss": 0.8151, - "step": 15500 - }, - { - "epoch": 0.13711345674428474, - "grad_norm": 3.9455597400665283, - "learning_rate": 4.771477572092859e-05, - "loss": 0.8346, - "step": 15510 - }, - { - "epoch": 0.13720186000459697, - "grad_norm": 5.669267177581787, - "learning_rate": 4.771330233325672e-05, - "loss": 0.8101, - "step": 15520 - }, - { - "epoch": 0.1372902632649092, - "grad_norm": 5.7352495193481445, - "learning_rate": 4.771182894558485e-05, - "loss": 0.8337, - "step": 15530 - }, - { - "epoch": 0.13737866652522146, - "grad_norm": 5.060227394104004, - "learning_rate": 4.7710355557912976e-05, - "loss": 0.7496, - "step": 15540 - }, - { - "epoch": 0.1374670697855337, - "grad_norm": 2.309218406677246, - "learning_rate": 4.770888217024111e-05, - "loss": 0.8622, - "step": 15550 - }, - { - "epoch": 0.13755547304584592, - "grad_norm": 5.97791051864624, - "learning_rate": 4.770740878256923e-05, - "loss": 0.7933, - "step": 15560 - }, - { - "epoch": 0.13764387630615818, - "grad_norm": 5.149936199188232, - "learning_rate": 4.770593539489737e-05, - "loss": 0.6902, - "step": 15570 - }, - { - "epoch": 0.1377322795664704, - "grad_norm": 4.803082466125488, - "learning_rate": 4.7704462007225496e-05, - "loss": 0.7019, - "step": 15580 - }, - { - "epoch": 0.13782068282678264, - "grad_norm": 8.148839950561523, - "learning_rate": 4.7702988619553624e-05, - "loss": 0.8295, - "step": 15590 - }, - { - "epoch": 0.1379090860870949, - "grad_norm": 4.663215637207031, - "learning_rate": 4.770151523188175e-05, - "loss": 0.9214, - "step": 15600 - }, - { - "epoch": 0.13799748934740713, - "grad_norm": 7.309902667999268, - "learning_rate": 4.770004184420988e-05, - "loss": 0.7368, - "step": 15610 - }, - { - "epoch": 0.13808589260771936, - "grad_norm": 3.3789870738983154, - "learning_rate": 4.769856845653801e-05, - "loss": 0.8317, - "step": 15620 - }, - { - "epoch": 0.13817429586803162, - "grad_norm": 6.748372554779053, - "learning_rate": 4.7697095068866144e-05, - "loss": 0.8868, - "step": 15630 - }, - { - "epoch": 0.13826269912834385, - "grad_norm": 12.395320892333984, - "learning_rate": 4.7695621681194266e-05, - "loss": 0.885, - "step": 15640 - }, - { - "epoch": 0.13835110238865608, - "grad_norm": 5.548238754272461, - "learning_rate": 4.76941482935224e-05, - "loss": 0.7629, - "step": 15650 - }, - { - "epoch": 0.13843950564896834, - "grad_norm": 2.1533687114715576, - "learning_rate": 4.769267490585053e-05, - "loss": 0.7936, - "step": 15660 - }, - { - "epoch": 0.13852790890928057, - "grad_norm": 3.9797375202178955, - "learning_rate": 4.769120151817866e-05, - "loss": 0.8491, - "step": 15670 - }, - { - "epoch": 0.1386163121695928, - "grad_norm": 2.603139638900757, - "learning_rate": 4.7689728130506786e-05, - "loss": 0.8466, - "step": 15680 - }, - { - "epoch": 0.13870471542990506, - "grad_norm": 2.4111125469207764, - "learning_rate": 4.768825474283492e-05, - "loss": 0.8236, - "step": 15690 - }, - { - "epoch": 0.1387931186902173, - "grad_norm": 5.43579626083374, - "learning_rate": 4.768678135516304e-05, - "loss": 0.8628, - "step": 15700 - }, - { - "epoch": 0.13888152195052952, - "grad_norm": 2.9611611366271973, - "learning_rate": 4.768530796749118e-05, - "loss": 0.7865, - "step": 15710 - }, - { - "epoch": 0.13896992521084178, - "grad_norm": 3.3550655841827393, - "learning_rate": 4.7683834579819306e-05, - "loss": 0.8111, - "step": 15720 - }, - { - "epoch": 0.139058328471154, - "grad_norm": 6.024102687835693, - "learning_rate": 4.7682361192147435e-05, - "loss": 0.8792, - "step": 15730 - }, - { - "epoch": 0.13914673173146624, - "grad_norm": 6.341953754425049, - "learning_rate": 4.768088780447556e-05, - "loss": 0.9089, - "step": 15740 - }, - { - "epoch": 0.1392351349917785, - "grad_norm": 2.9542484283447266, - "learning_rate": 4.767941441680369e-05, - "loss": 0.7912, - "step": 15750 - }, - { - "epoch": 0.13932353825209073, - "grad_norm": 12.003358840942383, - "learning_rate": 4.767794102913182e-05, - "loss": 0.8087, - "step": 15760 - }, - { - "epoch": 0.13941194151240296, - "grad_norm": 9.762426376342773, - "learning_rate": 4.7676467641459955e-05, - "loss": 0.7731, - "step": 15770 - }, - { - "epoch": 0.13950034477271522, - "grad_norm": 4.455225944519043, - "learning_rate": 4.767499425378808e-05, - "loss": 0.8065, - "step": 15780 - }, - { - "epoch": 0.13958874803302745, - "grad_norm": 5.925757884979248, - "learning_rate": 4.767352086611621e-05, - "loss": 0.9041, - "step": 15790 - }, - { - "epoch": 0.13967715129333969, - "grad_norm": 4.6304521560668945, - "learning_rate": 4.767204747844434e-05, - "loss": 0.8063, - "step": 15800 - }, - { - "epoch": 0.13976555455365194, - "grad_norm": 9.210540771484375, - "learning_rate": 4.767057409077247e-05, - "loss": 0.8222, - "step": 15810 - }, - { - "epoch": 0.13985395781396417, - "grad_norm": 2.7507073879241943, - "learning_rate": 4.7669100703100597e-05, - "loss": 0.8012, - "step": 15820 - }, - { - "epoch": 0.1399423610742764, - "grad_norm": 11.781067848205566, - "learning_rate": 4.766762731542873e-05, - "loss": 0.7386, - "step": 15830 - }, - { - "epoch": 0.14003076433458866, - "grad_norm": 4.750818729400635, - "learning_rate": 4.766615392775686e-05, - "loss": 0.9513, - "step": 15840 - }, - { - "epoch": 0.1401191675949009, - "grad_norm": 3.239306688308716, - "learning_rate": 4.766468054008499e-05, - "loss": 0.7889, - "step": 15850 - }, - { - "epoch": 0.14020757085521315, - "grad_norm": 5.480432510375977, - "learning_rate": 4.766320715241312e-05, - "loss": 0.7502, - "step": 15860 - }, - { - "epoch": 0.14029597411552538, - "grad_norm": 13.013794898986816, - "learning_rate": 4.7661733764741245e-05, - "loss": 0.9384, - "step": 15870 - }, - { - "epoch": 0.14038437737583762, - "grad_norm": 4.921560287475586, - "learning_rate": 4.7660260377069373e-05, - "loss": 0.8006, - "step": 15880 - }, - { - "epoch": 0.14047278063614987, - "grad_norm": 1.9659104347229004, - "learning_rate": 4.76587869893975e-05, - "loss": 0.8384, - "step": 15890 - }, - { - "epoch": 0.1405611838964621, - "grad_norm": 6.83479118347168, - "learning_rate": 4.765731360172564e-05, - "loss": 0.7581, - "step": 15900 - }, - { - "epoch": 0.14064958715677434, - "grad_norm": 5.402812480926514, - "learning_rate": 4.7655840214053765e-05, - "loss": 0.9052, - "step": 15910 - }, - { - "epoch": 0.1407379904170866, - "grad_norm": 6.77946138381958, - "learning_rate": 4.7654366826381894e-05, - "loss": 0.793, - "step": 15920 - }, - { - "epoch": 0.14082639367739883, - "grad_norm": 4.841235160827637, - "learning_rate": 4.765289343871002e-05, - "loss": 0.6792, - "step": 15930 - }, - { - "epoch": 0.14091479693771106, - "grad_norm": 6.926675796508789, - "learning_rate": 4.765142005103815e-05, - "loss": 0.8645, - "step": 15940 - }, - { - "epoch": 0.14100320019802332, - "grad_norm": 7.380031585693359, - "learning_rate": 4.764994666336628e-05, - "loss": 0.8126, - "step": 15950 - }, - { - "epoch": 0.14109160345833555, - "grad_norm": 7.4095988273620605, - "learning_rate": 4.7648473275694414e-05, - "loss": 0.7796, - "step": 15960 - }, - { - "epoch": 0.14118000671864778, - "grad_norm": 3.8512370586395264, - "learning_rate": 4.7646999888022535e-05, - "loss": 0.7745, - "step": 15970 - }, - { - "epoch": 0.14126840997896004, - "grad_norm": 2.0865118503570557, - "learning_rate": 4.764552650035067e-05, - "loss": 0.8013, - "step": 15980 - }, - { - "epoch": 0.14135681323927227, - "grad_norm": 2.9433555603027344, - "learning_rate": 4.76440531126788e-05, - "loss": 0.7495, - "step": 15990 - }, - { - "epoch": 0.1414452164995845, - "grad_norm": 4.146435260772705, - "learning_rate": 4.764257972500693e-05, - "loss": 0.8801, - "step": 16000 - }, - { - "epoch": 0.14153361975989676, - "grad_norm": 9.377398490905762, - "learning_rate": 4.7641106337335056e-05, - "loss": 0.8938, - "step": 16010 - }, - { - "epoch": 0.141622023020209, - "grad_norm": 7.253297805786133, - "learning_rate": 4.763963294966319e-05, - "loss": 0.8652, - "step": 16020 - }, - { - "epoch": 0.14171042628052122, - "grad_norm": 6.060379505157471, - "learning_rate": 4.763815956199131e-05, - "loss": 0.8512, - "step": 16030 - }, - { - "epoch": 0.14179882954083348, - "grad_norm": 2.855339527130127, - "learning_rate": 4.763668617431945e-05, - "loss": 0.8104, - "step": 16040 - }, - { - "epoch": 0.1418872328011457, - "grad_norm": 9.66022777557373, - "learning_rate": 4.7635212786647576e-05, - "loss": 0.9924, - "step": 16050 - }, - { - "epoch": 0.14197563606145794, - "grad_norm": 3.456629991531372, - "learning_rate": 4.7633739398975704e-05, - "loss": 0.8359, - "step": 16060 - }, - { - "epoch": 0.1420640393217702, - "grad_norm": 5.4765191078186035, - "learning_rate": 4.763226601130383e-05, - "loss": 0.7722, - "step": 16070 - }, - { - "epoch": 0.14215244258208243, - "grad_norm": 8.099782943725586, - "learning_rate": 4.763079262363197e-05, - "loss": 0.7993, - "step": 16080 - }, - { - "epoch": 0.14224084584239466, - "grad_norm": 6.92963171005249, - "learning_rate": 4.762931923596009e-05, - "loss": 0.8937, - "step": 16090 - }, - { - "epoch": 0.14232924910270692, - "grad_norm": 3.91198992729187, - "learning_rate": 4.7627845848288224e-05, - "loss": 0.8908, - "step": 16100 - }, - { - "epoch": 0.14241765236301915, - "grad_norm": 4.417505264282227, - "learning_rate": 4.7626372460616346e-05, - "loss": 0.7337, - "step": 16110 - }, - { - "epoch": 0.14250605562333138, - "grad_norm": 2.9531617164611816, - "learning_rate": 4.762489907294448e-05, - "loss": 0.7656, - "step": 16120 - }, - { - "epoch": 0.14259445888364364, - "grad_norm": 6.2340407371521, - "learning_rate": 4.762342568527261e-05, - "loss": 0.9849, - "step": 16130 - }, - { - "epoch": 0.14268286214395587, - "grad_norm": 2.0591773986816406, - "learning_rate": 4.762195229760074e-05, - "loss": 0.8713, - "step": 16140 - }, - { - "epoch": 0.1427712654042681, - "grad_norm": 3.8224780559539795, - "learning_rate": 4.7620478909928866e-05, - "loss": 0.8033, - "step": 16150 - }, - { - "epoch": 0.14285966866458036, - "grad_norm": 3.3133275508880615, - "learning_rate": 4.7619005522257e-05, - "loss": 0.9767, - "step": 16160 - }, - { - "epoch": 0.1429480719248926, - "grad_norm": 10.423565864562988, - "learning_rate": 4.761753213458512e-05, - "loss": 0.9264, - "step": 16170 - }, - { - "epoch": 0.14303647518520482, - "grad_norm": 6.098910331726074, - "learning_rate": 4.761605874691326e-05, - "loss": 0.7519, - "step": 16180 - }, - { - "epoch": 0.14312487844551708, - "grad_norm": 8.131715774536133, - "learning_rate": 4.7614585359241386e-05, - "loss": 0.9972, - "step": 16190 - }, - { - "epoch": 0.1432132817058293, - "grad_norm": 4.524472236633301, - "learning_rate": 4.7613111971569515e-05, - "loss": 0.8572, - "step": 16200 - }, - { - "epoch": 0.14330168496614154, - "grad_norm": 4.032301425933838, - "learning_rate": 4.761163858389764e-05, - "loss": 0.7664, - "step": 16210 - }, - { - "epoch": 0.1433900882264538, - "grad_norm": 4.198031425476074, - "learning_rate": 4.761016519622577e-05, - "loss": 0.9005, - "step": 16220 - }, - { - "epoch": 0.14347849148676603, - "grad_norm": 5.050949573516846, - "learning_rate": 4.76086918085539e-05, - "loss": 0.7792, - "step": 16230 - }, - { - "epoch": 0.14356689474707826, - "grad_norm": 4.83923864364624, - "learning_rate": 4.7607218420882035e-05, - "loss": 0.8835, - "step": 16240 - }, - { - "epoch": 0.14365529800739052, - "grad_norm": 4.71106481552124, - "learning_rate": 4.7605745033210156e-05, - "loss": 0.8189, - "step": 16250 - }, - { - "epoch": 0.14374370126770275, - "grad_norm": 2.0753655433654785, - "learning_rate": 4.760427164553829e-05, - "loss": 0.8018, - "step": 16260 - }, - { - "epoch": 0.14383210452801498, - "grad_norm": 7.493185520172119, - "learning_rate": 4.760279825786642e-05, - "loss": 0.7613, - "step": 16270 - }, - { - "epoch": 0.14392050778832724, - "grad_norm": 11.220280647277832, - "learning_rate": 4.760132487019455e-05, - "loss": 0.8538, - "step": 16280 - }, - { - "epoch": 0.14400891104863947, - "grad_norm": 4.551141262054443, - "learning_rate": 4.759985148252268e-05, - "loss": 0.8649, - "step": 16290 - }, - { - "epoch": 0.1440973143089517, - "grad_norm": 2.6454129219055176, - "learning_rate": 4.759837809485081e-05, - "loss": 0.7681, - "step": 16300 - }, - { - "epoch": 0.14418571756926396, - "grad_norm": 6.077807903289795, - "learning_rate": 4.759690470717893e-05, - "loss": 0.8123, - "step": 16310 - }, - { - "epoch": 0.1442741208295762, - "grad_norm": 13.279069900512695, - "learning_rate": 4.759543131950707e-05, - "loss": 0.7205, - "step": 16320 - }, - { - "epoch": 0.14436252408988842, - "grad_norm": 5.215303421020508, - "learning_rate": 4.759395793183519e-05, - "loss": 0.9053, - "step": 16330 - }, - { - "epoch": 0.14445092735020068, - "grad_norm": 3.764324188232422, - "learning_rate": 4.7592484544163325e-05, - "loss": 0.7493, - "step": 16340 - }, - { - "epoch": 0.1445393306105129, - "grad_norm": 9.877325057983398, - "learning_rate": 4.7591011156491454e-05, - "loss": 0.7996, - "step": 16350 - }, - { - "epoch": 0.14462773387082514, - "grad_norm": 7.554166316986084, - "learning_rate": 4.758953776881958e-05, - "loss": 0.8533, - "step": 16360 - }, - { - "epoch": 0.1447161371311374, - "grad_norm": 2.535881996154785, - "learning_rate": 4.758806438114771e-05, - "loss": 0.7532, - "step": 16370 - }, - { - "epoch": 0.14480454039144963, - "grad_norm": 3.8387203216552734, - "learning_rate": 4.7586590993475845e-05, - "loss": 0.8488, - "step": 16380 - }, - { - "epoch": 0.1448929436517619, - "grad_norm": 4.006622314453125, - "learning_rate": 4.758511760580397e-05, - "loss": 0.9753, - "step": 16390 - }, - { - "epoch": 0.14498134691207412, - "grad_norm": 3.8722243309020996, - "learning_rate": 4.75836442181321e-05, - "loss": 0.8216, - "step": 16400 - }, - { - "epoch": 0.14506975017238635, - "grad_norm": 4.150247573852539, - "learning_rate": 4.758217083046023e-05, - "loss": 0.9156, - "step": 16410 - }, - { - "epoch": 0.1451581534326986, - "grad_norm": 2.6241047382354736, - "learning_rate": 4.758069744278836e-05, - "loss": 0.8719, - "step": 16420 - }, - { - "epoch": 0.14524655669301084, - "grad_norm": 5.52913761138916, - "learning_rate": 4.757922405511649e-05, - "loss": 0.7967, - "step": 16430 - }, - { - "epoch": 0.14533495995332307, - "grad_norm": 2.647954225540161, - "learning_rate": 4.7577750667444616e-05, - "loss": 0.7305, - "step": 16440 - }, - { - "epoch": 0.14542336321363533, - "grad_norm": 10.302239418029785, - "learning_rate": 4.7576277279772744e-05, - "loss": 0.8786, - "step": 16450 - }, - { - "epoch": 0.14551176647394756, - "grad_norm": 6.946014404296875, - "learning_rate": 4.757480389210088e-05, - "loss": 0.7101, - "step": 16460 - }, - { - "epoch": 0.1456001697342598, - "grad_norm": 7.227524757385254, - "learning_rate": 4.7573330504429e-05, - "loss": 0.7306, - "step": 16470 - }, - { - "epoch": 0.14568857299457205, - "grad_norm": 1.9578170776367188, - "learning_rate": 4.7571857116757136e-05, - "loss": 0.7232, - "step": 16480 - }, - { - "epoch": 0.14577697625488428, - "grad_norm": 4.118729114532471, - "learning_rate": 4.7570383729085264e-05, - "loss": 0.8288, - "step": 16490 - }, - { - "epoch": 0.1458653795151965, - "grad_norm": 9.074803352355957, - "learning_rate": 4.756891034141339e-05, - "loss": 0.9186, - "step": 16500 - }, - { - "epoch": 0.14595378277550877, - "grad_norm": 2.3599421977996826, - "learning_rate": 4.756743695374152e-05, - "loss": 0.7595, - "step": 16510 - }, - { - "epoch": 0.146042186035821, - "grad_norm": 5.108723163604736, - "learning_rate": 4.7565963566069656e-05, - "loss": 0.776, - "step": 16520 - }, - { - "epoch": 0.14613058929613323, - "grad_norm": 17.747661590576172, - "learning_rate": 4.756449017839778e-05, - "loss": 0.7488, - "step": 16530 - }, - { - "epoch": 0.1462189925564455, - "grad_norm": 2.2069766521453857, - "learning_rate": 4.756301679072591e-05, - "loss": 0.7622, - "step": 16540 - }, - { - "epoch": 0.14630739581675772, - "grad_norm": 2.8158762454986572, - "learning_rate": 4.756154340305404e-05, - "loss": 0.8401, - "step": 16550 - }, - { - "epoch": 0.14639579907706995, - "grad_norm": 6.137599945068359, - "learning_rate": 4.756007001538217e-05, - "loss": 0.8381, - "step": 16560 - }, - { - "epoch": 0.1464842023373822, - "grad_norm": 7.609254360198975, - "learning_rate": 4.75585966277103e-05, - "loss": 0.837, - "step": 16570 - }, - { - "epoch": 0.14657260559769444, - "grad_norm": 2.9540882110595703, - "learning_rate": 4.7557123240038426e-05, - "loss": 0.9364, - "step": 16580 - }, - { - "epoch": 0.14666100885800668, - "grad_norm": 2.969904899597168, - "learning_rate": 4.7555649852366554e-05, - "loss": 0.8806, - "step": 16590 - }, - { - "epoch": 0.14674941211831893, - "grad_norm": 3.8954246044158936, - "learning_rate": 4.755417646469469e-05, - "loss": 0.9661, - "step": 16600 - }, - { - "epoch": 0.14683781537863116, - "grad_norm": 3.806460380554199, - "learning_rate": 4.755270307702281e-05, - "loss": 0.8994, - "step": 16610 - }, - { - "epoch": 0.1469262186389434, - "grad_norm": 3.983349323272705, - "learning_rate": 4.7551229689350946e-05, - "loss": 0.9444, - "step": 16620 - }, - { - "epoch": 0.14701462189925565, - "grad_norm": 3.6446897983551025, - "learning_rate": 4.7549756301679075e-05, - "loss": 0.7827, - "step": 16630 - }, - { - "epoch": 0.14710302515956789, - "grad_norm": 4.506073951721191, - "learning_rate": 4.75482829140072e-05, - "loss": 0.7976, - "step": 16640 - }, - { - "epoch": 0.14719142841988012, - "grad_norm": 5.558804512023926, - "learning_rate": 4.754680952633533e-05, - "loss": 0.8037, - "step": 16650 - }, - { - "epoch": 0.14727983168019237, - "grad_norm": 5.593502998352051, - "learning_rate": 4.7545336138663466e-05, - "loss": 0.7896, - "step": 16660 - }, - { - "epoch": 0.1473682349405046, - "grad_norm": 2.9031991958618164, - "learning_rate": 4.754386275099159e-05, - "loss": 0.8126, - "step": 16670 - }, - { - "epoch": 0.14745663820081684, - "grad_norm": 2.8378851413726807, - "learning_rate": 4.754238936331972e-05, - "loss": 0.7547, - "step": 16680 - }, - { - "epoch": 0.1475450414611291, - "grad_norm": 9.635090827941895, - "learning_rate": 4.754091597564785e-05, - "loss": 0.962, - "step": 16690 - }, - { - "epoch": 0.14763344472144133, - "grad_norm": 6.492239475250244, - "learning_rate": 4.753944258797598e-05, - "loss": 0.8047, - "step": 16700 - }, - { - "epoch": 0.14772184798175356, - "grad_norm": 6.705989360809326, - "learning_rate": 4.753796920030411e-05, - "loss": 0.9173, - "step": 16710 - }, - { - "epoch": 0.14781025124206582, - "grad_norm": 6.1324992179870605, - "learning_rate": 4.7536495812632237e-05, - "loss": 0.8281, - "step": 16720 - }, - { - "epoch": 0.14789865450237805, - "grad_norm": 3.7849693298339844, - "learning_rate": 4.7535022424960365e-05, - "loss": 0.7801, - "step": 16730 - }, - { - "epoch": 0.14798705776269028, - "grad_norm": 2.7035951614379883, - "learning_rate": 4.75335490372885e-05, - "loss": 0.8403, - "step": 16740 - }, - { - "epoch": 0.14807546102300254, - "grad_norm": 2.984168529510498, - "learning_rate": 4.753207564961663e-05, - "loss": 0.8745, - "step": 16750 - }, - { - "epoch": 0.14816386428331477, - "grad_norm": 9.56676959991455, - "learning_rate": 4.753060226194476e-05, - "loss": 0.8441, - "step": 16760 - }, - { - "epoch": 0.148252267543627, - "grad_norm": 3.6529154777526855, - "learning_rate": 4.7529128874272885e-05, - "loss": 0.7925, - "step": 16770 - }, - { - "epoch": 0.14834067080393926, - "grad_norm": 4.137853622436523, - "learning_rate": 4.7527655486601013e-05, - "loss": 0.7155, - "step": 16780 - }, - { - "epoch": 0.1484290740642515, - "grad_norm": 3.5439553260803223, - "learning_rate": 4.752618209892914e-05, - "loss": 0.8398, - "step": 16790 - }, - { - "epoch": 0.14851747732456372, - "grad_norm": 7.92951774597168, - "learning_rate": 4.752470871125727e-05, - "loss": 0.94, - "step": 16800 - }, - { - "epoch": 0.14860588058487598, - "grad_norm": 6.424148082733154, - "learning_rate": 4.7523235323585405e-05, - "loss": 0.8397, - "step": 16810 - }, - { - "epoch": 0.1486942838451882, - "grad_norm": 2.9883008003234863, - "learning_rate": 4.7521761935913534e-05, - "loss": 0.7806, - "step": 16820 - }, - { - "epoch": 0.14878268710550044, - "grad_norm": 3.802116870880127, - "learning_rate": 4.752028854824166e-05, - "loss": 0.8431, - "step": 16830 - }, - { - "epoch": 0.1488710903658127, - "grad_norm": 3.7867634296417236, - "learning_rate": 4.751881516056979e-05, - "loss": 0.7787, - "step": 16840 - }, - { - "epoch": 0.14895949362612493, - "grad_norm": 5.5591840744018555, - "learning_rate": 4.751734177289792e-05, - "loss": 0.885, - "step": 16850 - }, - { - "epoch": 0.14904789688643716, - "grad_norm": 6.289994239807129, - "learning_rate": 4.751586838522605e-05, - "loss": 0.7527, - "step": 16860 - }, - { - "epoch": 0.14913630014674942, - "grad_norm": 2.677204132080078, - "learning_rate": 4.751439499755418e-05, - "loss": 0.8225, - "step": 16870 - }, - { - "epoch": 0.14922470340706165, - "grad_norm": 2.8042759895324707, - "learning_rate": 4.751292160988231e-05, - "loss": 0.814, - "step": 16880 - }, - { - "epoch": 0.14931310666737388, - "grad_norm": 3.9682416915893555, - "learning_rate": 4.751144822221044e-05, - "loss": 0.9185, - "step": 16890 - }, - { - "epoch": 0.14940150992768614, - "grad_norm": 6.54267692565918, - "learning_rate": 4.750997483453857e-05, - "loss": 0.8052, - "step": 16900 - }, - { - "epoch": 0.14948991318799837, - "grad_norm": 5.714585304260254, - "learning_rate": 4.7508501446866696e-05, - "loss": 0.7981, - "step": 16910 - }, - { - "epoch": 0.14957831644831063, - "grad_norm": 5.427567958831787, - "learning_rate": 4.7507028059194824e-05, - "loss": 0.8729, - "step": 16920 - }, - { - "epoch": 0.14966671970862286, - "grad_norm": 6.700192928314209, - "learning_rate": 4.750555467152296e-05, - "loss": 0.8984, - "step": 16930 - }, - { - "epoch": 0.1497551229689351, - "grad_norm": 6.20875358581543, - "learning_rate": 4.750408128385108e-05, - "loss": 0.8019, - "step": 16940 - }, - { - "epoch": 0.14984352622924735, - "grad_norm": 6.183334827423096, - "learning_rate": 4.7502607896179216e-05, - "loss": 0.8384, - "step": 16950 - }, - { - "epoch": 0.14993192948955958, - "grad_norm": 6.7605156898498535, - "learning_rate": 4.7501134508507344e-05, - "loss": 0.9076, - "step": 16960 - }, - { - "epoch": 0.1500203327498718, - "grad_norm": 10.34814167022705, - "learning_rate": 4.749966112083547e-05, - "loss": 0.7882, - "step": 16970 - }, - { - "epoch": 0.15010873601018407, - "grad_norm": 4.3602190017700195, - "learning_rate": 4.74981877331636e-05, - "loss": 0.8297, - "step": 16980 - }, - { - "epoch": 0.1501971392704963, - "grad_norm": 12.673126220703125, - "learning_rate": 4.7496714345491736e-05, - "loss": 0.8272, - "step": 16990 - }, - { - "epoch": 0.15028554253080853, - "grad_norm": 2.8232502937316895, - "learning_rate": 4.749524095781986e-05, - "loss": 0.7984, - "step": 17000 - }, - { - "epoch": 0.1503739457911208, - "grad_norm": 4.268876075744629, - "learning_rate": 4.749376757014799e-05, - "loss": 0.8776, - "step": 17010 - }, - { - "epoch": 0.15046234905143302, - "grad_norm": 3.9623379707336426, - "learning_rate": 4.749229418247612e-05, - "loss": 0.8895, - "step": 17020 - }, - { - "epoch": 0.15055075231174525, - "grad_norm": 5.467015743255615, - "learning_rate": 4.749082079480425e-05, - "loss": 0.8376, - "step": 17030 - }, - { - "epoch": 0.1506391555720575, - "grad_norm": 3.534050941467285, - "learning_rate": 4.748934740713238e-05, - "loss": 0.8534, - "step": 17040 - }, - { - "epoch": 0.15072755883236974, - "grad_norm": 11.9103422164917, - "learning_rate": 4.7487874019460506e-05, - "loss": 0.7452, - "step": 17050 - }, - { - "epoch": 0.15081596209268197, - "grad_norm": 7.80486536026001, - "learning_rate": 4.7486400631788634e-05, - "loss": 0.8906, - "step": 17060 - }, - { - "epoch": 0.15090436535299423, - "grad_norm": 6.0464887619018555, - "learning_rate": 4.748492724411677e-05, - "loss": 0.9138, - "step": 17070 - }, - { - "epoch": 0.15099276861330646, - "grad_norm": 4.2649030685424805, - "learning_rate": 4.748345385644489e-05, - "loss": 0.8548, - "step": 17080 - }, - { - "epoch": 0.1510811718736187, - "grad_norm": 3.9609460830688477, - "learning_rate": 4.7481980468773026e-05, - "loss": 0.8733, - "step": 17090 - }, - { - "epoch": 0.15116957513393095, - "grad_norm": 3.602222204208374, - "learning_rate": 4.7480507081101155e-05, - "loss": 0.8242, - "step": 17100 - }, - { - "epoch": 0.15125797839424318, - "grad_norm": 2.77740478515625, - "learning_rate": 4.747903369342928e-05, - "loss": 0.7811, - "step": 17110 - }, - { - "epoch": 0.1513463816545554, - "grad_norm": 10.50583267211914, - "learning_rate": 4.747756030575741e-05, - "loss": 0.8794, - "step": 17120 - }, - { - "epoch": 0.15143478491486767, - "grad_norm": 7.266853332519531, - "learning_rate": 4.7476086918085546e-05, - "loss": 0.8185, - "step": 17130 - }, - { - "epoch": 0.1515231881751799, - "grad_norm": 6.573174953460693, - "learning_rate": 4.747461353041367e-05, - "loss": 0.8115, - "step": 17140 - }, - { - "epoch": 0.15161159143549213, - "grad_norm": 4.139992713928223, - "learning_rate": 4.74731401427418e-05, - "loss": 0.7369, - "step": 17150 - }, - { - "epoch": 0.1516999946958044, - "grad_norm": 6.643852710723877, - "learning_rate": 4.7471666755069925e-05, - "loss": 0.9176, - "step": 17160 - }, - { - "epoch": 0.15178839795611662, - "grad_norm": 3.8509538173675537, - "learning_rate": 4.747019336739806e-05, - "loss": 0.8128, - "step": 17170 - }, - { - "epoch": 0.15187680121642885, - "grad_norm": 4.570491313934326, - "learning_rate": 4.746871997972619e-05, - "loss": 0.873, - "step": 17180 - }, - { - "epoch": 0.1519652044767411, - "grad_norm": 3.9173784255981445, - "learning_rate": 4.7467246592054317e-05, - "loss": 0.8889, - "step": 17190 - }, - { - "epoch": 0.15205360773705334, - "grad_norm": 6.56118631362915, - "learning_rate": 4.7465773204382445e-05, - "loss": 0.7714, - "step": 17200 - }, - { - "epoch": 0.15214201099736557, - "grad_norm": 7.693734169006348, - "learning_rate": 4.746429981671058e-05, - "loss": 0.8005, - "step": 17210 - }, - { - "epoch": 0.15223041425767783, - "grad_norm": 9.865682601928711, - "learning_rate": 4.74628264290387e-05, - "loss": 0.8259, - "step": 17220 - }, - { - "epoch": 0.15231881751799006, - "grad_norm": 4.894525051116943, - "learning_rate": 4.746135304136684e-05, - "loss": 0.9204, - "step": 17230 - }, - { - "epoch": 0.1524072207783023, - "grad_norm": 4.633498191833496, - "learning_rate": 4.7459879653694965e-05, - "loss": 0.8317, - "step": 17240 - }, - { - "epoch": 0.15249562403861455, - "grad_norm": 2.514660120010376, - "learning_rate": 4.7458406266023093e-05, - "loss": 0.8179, - "step": 17250 - }, - { - "epoch": 0.15258402729892678, - "grad_norm": 3.449573040008545, - "learning_rate": 4.745693287835122e-05, - "loss": 0.739, - "step": 17260 - }, - { - "epoch": 0.15267243055923901, - "grad_norm": 7.599994659423828, - "learning_rate": 4.745545949067935e-05, - "loss": 0.7893, - "step": 17270 - }, - { - "epoch": 0.15276083381955127, - "grad_norm": 5.116689205169678, - "learning_rate": 4.745398610300748e-05, - "loss": 0.8111, - "step": 17280 - }, - { - "epoch": 0.1528492370798635, - "grad_norm": 3.194286346435547, - "learning_rate": 4.7452512715335614e-05, - "loss": 0.8104, - "step": 17290 - }, - { - "epoch": 0.15293764034017573, - "grad_norm": 6.917521953582764, - "learning_rate": 4.7451039327663735e-05, - "loss": 0.9206, - "step": 17300 - }, - { - "epoch": 0.153026043600488, - "grad_norm": 4.247321605682373, - "learning_rate": 4.744956593999187e-05, - "loss": 0.7929, - "step": 17310 - }, - { - "epoch": 0.15311444686080022, - "grad_norm": 3.5399179458618164, - "learning_rate": 4.744809255232e-05, - "loss": 0.8369, - "step": 17320 - }, - { - "epoch": 0.15320285012111245, - "grad_norm": 8.189291000366211, - "learning_rate": 4.744661916464813e-05, - "loss": 0.7917, - "step": 17330 - }, - { - "epoch": 0.1532912533814247, - "grad_norm": 4.0611467361450195, - "learning_rate": 4.7445145776976255e-05, - "loss": 0.8189, - "step": 17340 - }, - { - "epoch": 0.15337965664173694, - "grad_norm": 8.815164566040039, - "learning_rate": 4.744367238930439e-05, - "loss": 0.7376, - "step": 17350 - }, - { - "epoch": 0.15346805990204918, - "grad_norm": 6.515460968017578, - "learning_rate": 4.744219900163251e-05, - "loss": 0.7956, - "step": 17360 - }, - { - "epoch": 0.15355646316236143, - "grad_norm": 7.194701194763184, - "learning_rate": 4.744072561396065e-05, - "loss": 0.8334, - "step": 17370 - }, - { - "epoch": 0.15364486642267366, - "grad_norm": 10.56981086730957, - "learning_rate": 4.743925222628877e-05, - "loss": 0.898, - "step": 17380 - }, - { - "epoch": 0.1537332696829859, - "grad_norm": 6.981093406677246, - "learning_rate": 4.7437778838616904e-05, - "loss": 0.8355, - "step": 17390 - }, - { - "epoch": 0.15382167294329815, - "grad_norm": 7.125991344451904, - "learning_rate": 4.743630545094503e-05, - "loss": 0.8454, - "step": 17400 - }, - { - "epoch": 0.15391007620361039, - "grad_norm": 6.938432216644287, - "learning_rate": 4.743483206327316e-05, - "loss": 0.8408, - "step": 17410 - }, - { - "epoch": 0.15399847946392262, - "grad_norm": 8.310367584228516, - "learning_rate": 4.743335867560129e-05, - "loss": 0.7057, - "step": 17420 - }, - { - "epoch": 0.15408688272423487, - "grad_norm": 5.260408878326416, - "learning_rate": 4.7431885287929424e-05, - "loss": 0.7177, - "step": 17430 - }, - { - "epoch": 0.1541752859845471, - "grad_norm": 5.00064754486084, - "learning_rate": 4.7430411900257546e-05, - "loss": 0.8111, - "step": 17440 - }, - { - "epoch": 0.15426368924485934, - "grad_norm": 4.080453395843506, - "learning_rate": 4.742893851258568e-05, - "loss": 0.9186, - "step": 17450 - }, - { - "epoch": 0.1543520925051716, - "grad_norm": 7.736790180206299, - "learning_rate": 4.742746512491381e-05, - "loss": 0.9419, - "step": 17460 - }, - { - "epoch": 0.15444049576548383, - "grad_norm": 5.28859281539917, - "learning_rate": 4.742599173724194e-05, - "loss": 0.876, - "step": 17470 - }, - { - "epoch": 0.15452889902579608, - "grad_norm": 1.9699537754058838, - "learning_rate": 4.7424518349570066e-05, - "loss": 0.7367, - "step": 17480 - }, - { - "epoch": 0.15461730228610832, - "grad_norm": 4.309940338134766, - "learning_rate": 4.74230449618982e-05, - "loss": 0.8329, - "step": 17490 - }, - { - "epoch": 0.15470570554642055, - "grad_norm": 6.036145210266113, - "learning_rate": 4.742157157422632e-05, - "loss": 0.8226, - "step": 17500 - }, - { - "epoch": 0.1547941088067328, - "grad_norm": 4.646291255950928, - "learning_rate": 4.742009818655446e-05, - "loss": 0.7517, - "step": 17510 - }, - { - "epoch": 0.15488251206704504, - "grad_norm": 2.6001710891723633, - "learning_rate": 4.741862479888258e-05, - "loss": 0.8583, - "step": 17520 - }, - { - "epoch": 0.15497091532735727, - "grad_norm": 2.145974636077881, - "learning_rate": 4.7417151411210715e-05, - "loss": 0.6555, - "step": 17530 - }, - { - "epoch": 0.15505931858766953, - "grad_norm": 3.989692449569702, - "learning_rate": 4.741567802353884e-05, - "loss": 0.8159, - "step": 17540 - }, - { - "epoch": 0.15514772184798176, - "grad_norm": 8.53581714630127, - "learning_rate": 4.741420463586697e-05, - "loss": 0.7689, - "step": 17550 - }, - { - "epoch": 0.155236125108294, - "grad_norm": 6.605713844299316, - "learning_rate": 4.74127312481951e-05, - "loss": 0.8069, - "step": 17560 - }, - { - "epoch": 0.15532452836860625, - "grad_norm": 7.3282999992370605, - "learning_rate": 4.7411257860523235e-05, - "loss": 0.8309, - "step": 17570 - }, - { - "epoch": 0.15541293162891848, - "grad_norm": 4.699496269226074, - "learning_rate": 4.7409784472851356e-05, - "loss": 0.8369, - "step": 17580 - }, - { - "epoch": 0.1555013348892307, - "grad_norm": 1.816676378250122, - "learning_rate": 4.740831108517949e-05, - "loss": 0.7336, - "step": 17590 - }, - { - "epoch": 0.15558973814954297, - "grad_norm": 5.680145263671875, - "learning_rate": 4.740683769750762e-05, - "loss": 0.7462, - "step": 17600 - }, - { - "epoch": 0.1556781414098552, - "grad_norm": 2.0201945304870605, - "learning_rate": 4.740536430983575e-05, - "loss": 0.9124, - "step": 17610 - }, - { - "epoch": 0.15576654467016743, - "grad_norm": 4.1975812911987305, - "learning_rate": 4.7403890922163876e-05, - "loss": 0.7377, - "step": 17620 - }, - { - "epoch": 0.1558549479304797, - "grad_norm": 8.152978897094727, - "learning_rate": 4.7402417534492005e-05, - "loss": 0.8281, - "step": 17630 - }, - { - "epoch": 0.15594335119079192, - "grad_norm": 3.530212163925171, - "learning_rate": 4.740094414682013e-05, - "loss": 0.7422, - "step": 17640 - }, - { - "epoch": 0.15603175445110415, - "grad_norm": 6.836564540863037, - "learning_rate": 4.739947075914827e-05, - "loss": 0.8205, - "step": 17650 - }, - { - "epoch": 0.1561201577114164, - "grad_norm": 6.836387634277344, - "learning_rate": 4.73979973714764e-05, - "loss": 0.9054, - "step": 17660 - }, - { - "epoch": 0.15620856097172864, - "grad_norm": 11.761429786682129, - "learning_rate": 4.7396523983804525e-05, - "loss": 0.7469, - "step": 17670 - }, - { - "epoch": 0.15629696423204087, - "grad_norm": 19.648902893066406, - "learning_rate": 4.739505059613265e-05, - "loss": 0.9581, - "step": 17680 - }, - { - "epoch": 0.15638536749235313, - "grad_norm": 5.903117656707764, - "learning_rate": 4.739357720846078e-05, - "loss": 0.7907, - "step": 17690 - }, - { - "epoch": 0.15647377075266536, - "grad_norm": 9.697010040283203, - "learning_rate": 4.739210382078891e-05, - "loss": 0.8265, - "step": 17700 - }, - { - "epoch": 0.1565621740129776, - "grad_norm": 4.563940525054932, - "learning_rate": 4.7390630433117045e-05, - "loss": 0.8707, - "step": 17710 - }, - { - "epoch": 0.15665057727328985, - "grad_norm": 4.969250202178955, - "learning_rate": 4.7389157045445174e-05, - "loss": 0.9309, - "step": 17720 - }, - { - "epoch": 0.15673898053360208, - "grad_norm": 3.407184600830078, - "learning_rate": 4.73876836577733e-05, - "loss": 0.7936, - "step": 17730 - }, - { - "epoch": 0.1568273837939143, - "grad_norm": 7.285817623138428, - "learning_rate": 4.738621027010143e-05, - "loss": 0.7602, - "step": 17740 - }, - { - "epoch": 0.15691578705422657, - "grad_norm": 2.969336748123169, - "learning_rate": 4.738473688242956e-05, - "loss": 0.6712, - "step": 17750 - }, - { - "epoch": 0.1570041903145388, - "grad_norm": 5.654608249664307, - "learning_rate": 4.738326349475769e-05, - "loss": 0.8435, - "step": 17760 - }, - { - "epoch": 0.15709259357485103, - "grad_norm": 6.064638137817383, - "learning_rate": 4.7381790107085815e-05, - "loss": 0.7975, - "step": 17770 - }, - { - "epoch": 0.1571809968351633, - "grad_norm": 8.733338356018066, - "learning_rate": 4.738031671941395e-05, - "loss": 0.7818, - "step": 17780 - }, - { - "epoch": 0.15726940009547552, - "grad_norm": 8.815160751342773, - "learning_rate": 4.737884333174208e-05, - "loss": 0.9034, - "step": 17790 - }, - { - "epoch": 0.15735780335578775, - "grad_norm": 11.449951171875, - "learning_rate": 4.737736994407021e-05, - "loss": 0.8046, - "step": 17800 - }, - { - "epoch": 0.1574462066161, - "grad_norm": 2.8350718021392822, - "learning_rate": 4.7375896556398336e-05, - "loss": 0.7466, - "step": 17810 - }, - { - "epoch": 0.15753460987641224, - "grad_norm": 6.78036642074585, - "learning_rate": 4.7374423168726464e-05, - "loss": 0.8652, - "step": 17820 - }, - { - "epoch": 0.15762301313672447, - "grad_norm": 14.450384140014648, - "learning_rate": 4.737294978105459e-05, - "loss": 0.7937, - "step": 17830 - }, - { - "epoch": 0.15771141639703673, - "grad_norm": 3.3525030612945557, - "learning_rate": 4.737147639338273e-05, - "loss": 0.7093, - "step": 17840 - }, - { - "epoch": 0.15779981965734896, - "grad_norm": 4.203586101531982, - "learning_rate": 4.737000300571085e-05, - "loss": 0.9038, - "step": 17850 - }, - { - "epoch": 0.1578882229176612, - "grad_norm": 5.063088417053223, - "learning_rate": 4.7368529618038984e-05, - "loss": 0.7865, - "step": 17860 - }, - { - "epoch": 0.15797662617797345, - "grad_norm": 3.026818037033081, - "learning_rate": 4.736705623036711e-05, - "loss": 0.9251, - "step": 17870 - }, - { - "epoch": 0.15806502943828568, - "grad_norm": 3.864360809326172, - "learning_rate": 4.736558284269524e-05, - "loss": 0.892, - "step": 17880 - }, - { - "epoch": 0.1581534326985979, - "grad_norm": 6.614564418792725, - "learning_rate": 4.736410945502337e-05, - "loss": 0.747, - "step": 17890 - }, - { - "epoch": 0.15824183595891017, - "grad_norm": 4.892355442047119, - "learning_rate": 4.7362636067351504e-05, - "loss": 0.9152, - "step": 17900 - }, - { - "epoch": 0.1583302392192224, - "grad_norm": 5.17777156829834, - "learning_rate": 4.7361162679679626e-05, - "loss": 0.8945, - "step": 17910 - }, - { - "epoch": 0.15841864247953463, - "grad_norm": 7.7891435623168945, - "learning_rate": 4.735968929200776e-05, - "loss": 1.0744, - "step": 17920 - }, - { - "epoch": 0.1585070457398469, - "grad_norm": 3.0126953125, - "learning_rate": 4.735821590433589e-05, - "loss": 0.8189, - "step": 17930 - }, - { - "epoch": 0.15859544900015912, - "grad_norm": 3.6667325496673584, - "learning_rate": 4.735674251666402e-05, - "loss": 0.8698, - "step": 17940 - }, - { - "epoch": 0.15868385226047135, - "grad_norm": 5.9046549797058105, - "learning_rate": 4.7355269128992146e-05, - "loss": 0.7714, - "step": 17950 - }, - { - "epoch": 0.1587722555207836, - "grad_norm": 6.023751735687256, - "learning_rate": 4.735379574132028e-05, - "loss": 0.7184, - "step": 17960 - }, - { - "epoch": 0.15886065878109584, - "grad_norm": 2.8186686038970947, - "learning_rate": 4.73523223536484e-05, - "loss": 0.7829, - "step": 17970 - }, - { - "epoch": 0.15894906204140807, - "grad_norm": 6.186093330383301, - "learning_rate": 4.735084896597654e-05, - "loss": 0.9277, - "step": 17980 - }, - { - "epoch": 0.15903746530172033, - "grad_norm": 11.59505558013916, - "learning_rate": 4.734937557830466e-05, - "loss": 0.7751, - "step": 17990 - }, - { - "epoch": 0.15912586856203256, - "grad_norm": 5.0415120124816895, - "learning_rate": 4.7347902190632795e-05, - "loss": 0.8904, - "step": 18000 - }, - { - "epoch": 0.15921427182234482, - "grad_norm": 3.4007201194763184, - "learning_rate": 4.734642880296092e-05, - "loss": 0.9191, - "step": 18010 - }, - { - "epoch": 0.15930267508265705, - "grad_norm": 3.271740436553955, - "learning_rate": 4.734495541528905e-05, - "loss": 0.7514, - "step": 18020 - }, - { - "epoch": 0.15939107834296928, - "grad_norm": 4.815491676330566, - "learning_rate": 4.734348202761718e-05, - "loss": 0.8331, - "step": 18030 - }, - { - "epoch": 0.15947948160328154, - "grad_norm": 6.87015962600708, - "learning_rate": 4.7342008639945315e-05, - "loss": 0.8161, - "step": 18040 - }, - { - "epoch": 0.15956788486359377, - "grad_norm": 6.4885573387146, - "learning_rate": 4.7340535252273436e-05, - "loss": 0.8169, - "step": 18050 - }, - { - "epoch": 0.159656288123906, - "grad_norm": 3.9206607341766357, - "learning_rate": 4.733906186460157e-05, - "loss": 0.8445, - "step": 18060 - }, - { - "epoch": 0.15974469138421826, - "grad_norm": 3.4145724773406982, - "learning_rate": 4.73375884769297e-05, - "loss": 0.8926, - "step": 18070 - }, - { - "epoch": 0.1598330946445305, - "grad_norm": 3.1308720111846924, - "learning_rate": 4.733611508925783e-05, - "loss": 0.7923, - "step": 18080 - }, - { - "epoch": 0.15992149790484272, - "grad_norm": 2.564180850982666, - "learning_rate": 4.7334641701585957e-05, - "loss": 0.874, - "step": 18090 - }, - { - "epoch": 0.16000990116515498, - "grad_norm": 2.432877540588379, - "learning_rate": 4.7333168313914085e-05, - "loss": 0.8792, - "step": 18100 - }, - { - "epoch": 0.1600983044254672, - "grad_norm": 5.122593879699707, - "learning_rate": 4.733169492624221e-05, - "loss": 0.7577, - "step": 18110 - }, - { - "epoch": 0.16018670768577944, - "grad_norm": 5.265462875366211, - "learning_rate": 4.733022153857035e-05, - "loss": 0.8311, - "step": 18120 - }, - { - "epoch": 0.1602751109460917, - "grad_norm": 5.594770431518555, - "learning_rate": 4.732874815089847e-05, - "loss": 0.8619, - "step": 18130 - }, - { - "epoch": 0.16036351420640393, - "grad_norm": 9.650065422058105, - "learning_rate": 4.7327274763226605e-05, - "loss": 0.8059, - "step": 18140 - }, - { - "epoch": 0.16045191746671617, - "grad_norm": 7.045647144317627, - "learning_rate": 4.7325801375554733e-05, - "loss": 0.9249, - "step": 18150 - }, - { - "epoch": 0.16054032072702842, - "grad_norm": 5.509271621704102, - "learning_rate": 4.732432798788286e-05, - "loss": 0.7212, - "step": 18160 - }, - { - "epoch": 0.16062872398734065, - "grad_norm": 7.598241806030273, - "learning_rate": 4.732285460021099e-05, - "loss": 0.7302, - "step": 18170 - }, - { - "epoch": 0.16071712724765289, - "grad_norm": 5.693346977233887, - "learning_rate": 4.7321381212539125e-05, - "loss": 0.787, - "step": 18180 - }, - { - "epoch": 0.16080553050796514, - "grad_norm": 7.060309410095215, - "learning_rate": 4.731990782486725e-05, - "loss": 0.7158, - "step": 18190 - }, - { - "epoch": 0.16089393376827738, - "grad_norm": 8.715744972229004, - "learning_rate": 4.731843443719538e-05, - "loss": 0.8629, - "step": 18200 - }, - { - "epoch": 0.1609823370285896, - "grad_norm": 5.086950302124023, - "learning_rate": 4.7316961049523504e-05, - "loss": 0.8574, - "step": 18210 - }, - { - "epoch": 0.16107074028890186, - "grad_norm": 2.8243017196655273, - "learning_rate": 4.731548766185164e-05, - "loss": 0.6888, - "step": 18220 - }, - { - "epoch": 0.1611591435492141, - "grad_norm": 2.8839316368103027, - "learning_rate": 4.731401427417977e-05, - "loss": 0.8373, - "step": 18230 - }, - { - "epoch": 0.16124754680952633, - "grad_norm": 5.097020149230957, - "learning_rate": 4.7312540886507895e-05, - "loss": 0.788, - "step": 18240 - }, - { - "epoch": 0.16133595006983859, - "grad_norm": 5.074483871459961, - "learning_rate": 4.7311067498836024e-05, - "loss": 0.7805, - "step": 18250 - }, - { - "epoch": 0.16142435333015082, - "grad_norm": 4.021812915802002, - "learning_rate": 4.730959411116416e-05, - "loss": 0.7763, - "step": 18260 - }, - { - "epoch": 0.16151275659046305, - "grad_norm": 7.153144836425781, - "learning_rate": 4.730812072349228e-05, - "loss": 0.9315, - "step": 18270 - }, - { - "epoch": 0.1616011598507753, - "grad_norm": 3.6252365112304688, - "learning_rate": 4.7306647335820416e-05, - "loss": 0.8893, - "step": 18280 - }, - { - "epoch": 0.16168956311108754, - "grad_norm": 7.570452690124512, - "learning_rate": 4.7305173948148544e-05, - "loss": 0.7164, - "step": 18290 - }, - { - "epoch": 0.16177796637139977, - "grad_norm": 3.345590591430664, - "learning_rate": 4.730370056047667e-05, - "loss": 0.8292, - "step": 18300 - }, - { - "epoch": 0.16186636963171203, - "grad_norm": 8.756311416625977, - "learning_rate": 4.73022271728048e-05, - "loss": 0.8451, - "step": 18310 - }, - { - "epoch": 0.16195477289202426, - "grad_norm": 7.167171478271484, - "learning_rate": 4.730075378513293e-05, - "loss": 0.8121, - "step": 18320 - }, - { - "epoch": 0.1620431761523365, - "grad_norm": 2.588923454284668, - "learning_rate": 4.729928039746106e-05, - "loss": 0.8351, - "step": 18330 - }, - { - "epoch": 0.16213157941264875, - "grad_norm": 5.5366997718811035, - "learning_rate": 4.729780700978919e-05, - "loss": 0.7912, - "step": 18340 - }, - { - "epoch": 0.16221998267296098, - "grad_norm": 4.38487434387207, - "learning_rate": 4.7296333622117314e-05, - "loss": 0.7493, - "step": 18350 - }, - { - "epoch": 0.1623083859332732, - "grad_norm": 4.288943767547607, - "learning_rate": 4.729486023444545e-05, - "loss": 0.8852, - "step": 18360 - }, - { - "epoch": 0.16239678919358547, - "grad_norm": 4.566148281097412, - "learning_rate": 4.729338684677358e-05, - "loss": 0.849, - "step": 18370 - }, - { - "epoch": 0.1624851924538977, - "grad_norm": 6.831437587738037, - "learning_rate": 4.7291913459101706e-05, - "loss": 0.7716, - "step": 18380 - }, - { - "epoch": 0.16257359571420993, - "grad_norm": 6.257946491241455, - "learning_rate": 4.7290440071429834e-05, - "loss": 0.8591, - "step": 18390 - }, - { - "epoch": 0.1626619989745222, - "grad_norm": 3.754155397415161, - "learning_rate": 4.728896668375797e-05, - "loss": 0.8443, - "step": 18400 - }, - { - "epoch": 0.16275040223483442, - "grad_norm": 6.450690269470215, - "learning_rate": 4.728749329608609e-05, - "loss": 0.8167, - "step": 18410 - }, - { - "epoch": 0.16283880549514665, - "grad_norm": 4.664546489715576, - "learning_rate": 4.7286019908414226e-05, - "loss": 0.7837, - "step": 18420 - }, - { - "epoch": 0.1629272087554589, - "grad_norm": 7.001297473907471, - "learning_rate": 4.7284546520742354e-05, - "loss": 0.7788, - "step": 18430 - }, - { - "epoch": 0.16301561201577114, - "grad_norm": 2.9480931758880615, - "learning_rate": 4.728307313307048e-05, - "loss": 0.8633, - "step": 18440 - }, - { - "epoch": 0.16310401527608337, - "grad_norm": 4.539743423461914, - "learning_rate": 4.728159974539861e-05, - "loss": 0.6939, - "step": 18450 - }, - { - "epoch": 0.16319241853639563, - "grad_norm": 1.7249478101730347, - "learning_rate": 4.728012635772674e-05, - "loss": 0.7591, - "step": 18460 - }, - { - "epoch": 0.16328082179670786, - "grad_norm": 4.965416431427002, - "learning_rate": 4.727865297005487e-05, - "loss": 0.8193, - "step": 18470 - }, - { - "epoch": 0.1633692250570201, - "grad_norm": 3.3861422538757324, - "learning_rate": 4.7277179582383e-05, - "loss": 0.7937, - "step": 18480 - }, - { - "epoch": 0.16345762831733235, - "grad_norm": 4.316111087799072, - "learning_rate": 4.7275706194711125e-05, - "loss": 0.8458, - "step": 18490 - }, - { - "epoch": 0.16354603157764458, - "grad_norm": 3.774136543273926, - "learning_rate": 4.727423280703926e-05, - "loss": 0.7364, - "step": 18500 - }, - { - "epoch": 0.1636344348379568, - "grad_norm": 4.713778972625732, - "learning_rate": 4.727275941936739e-05, - "loss": 0.8164, - "step": 18510 - }, - { - "epoch": 0.16372283809826907, - "grad_norm": 3.9100100994110107, - "learning_rate": 4.7271286031695516e-05, - "loss": 0.8595, - "step": 18520 - }, - { - "epoch": 0.1638112413585813, - "grad_norm": 4.755215167999268, - "learning_rate": 4.7269812644023645e-05, - "loss": 0.7251, - "step": 18530 - }, - { - "epoch": 0.16389964461889356, - "grad_norm": 5.091159820556641, - "learning_rate": 4.726833925635178e-05, - "loss": 0.8085, - "step": 18540 - }, - { - "epoch": 0.1639880478792058, - "grad_norm": 4.025550365447998, - "learning_rate": 4.72668658686799e-05, - "loss": 0.9259, - "step": 18550 - }, - { - "epoch": 0.16407645113951802, - "grad_norm": 4.209859848022461, - "learning_rate": 4.7265392481008037e-05, - "loss": 0.7756, - "step": 18560 - }, - { - "epoch": 0.16416485439983028, - "grad_norm": 10.84984302520752, - "learning_rate": 4.7263919093336165e-05, - "loss": 0.8719, - "step": 18570 - }, - { - "epoch": 0.1642532576601425, - "grad_norm": 2.2355659008026123, - "learning_rate": 4.726244570566429e-05, - "loss": 0.8152, - "step": 18580 - }, - { - "epoch": 0.16434166092045474, - "grad_norm": 8.716652870178223, - "learning_rate": 4.726097231799242e-05, - "loss": 0.97, - "step": 18590 - }, - { - "epoch": 0.164430064180767, - "grad_norm": 2.814476728439331, - "learning_rate": 4.725949893032055e-05, - "loss": 0.7929, - "step": 18600 - }, - { - "epoch": 0.16451846744107923, - "grad_norm": 4.9129414558410645, - "learning_rate": 4.725802554264868e-05, - "loss": 0.8613, - "step": 18610 - }, - { - "epoch": 0.16460687070139146, - "grad_norm": 3.8735432624816895, - "learning_rate": 4.7256552154976814e-05, - "loss": 0.788, - "step": 18620 - }, - { - "epoch": 0.16469527396170372, - "grad_norm": 2.8175761699676514, - "learning_rate": 4.725507876730494e-05, - "loss": 0.7751, - "step": 18630 - }, - { - "epoch": 0.16478367722201595, - "grad_norm": 6.7121500968933105, - "learning_rate": 4.725360537963307e-05, - "loss": 0.8249, - "step": 18640 - }, - { - "epoch": 0.16487208048232818, - "grad_norm": 7.731828212738037, - "learning_rate": 4.72521319919612e-05, - "loss": 0.792, - "step": 18650 - }, - { - "epoch": 0.16496048374264044, - "grad_norm": 4.961273670196533, - "learning_rate": 4.725065860428933e-05, - "loss": 0.7956, - "step": 18660 - }, - { - "epoch": 0.16504888700295267, - "grad_norm": 4.998683929443359, - "learning_rate": 4.724918521661746e-05, - "loss": 0.793, - "step": 18670 - }, - { - "epoch": 0.1651372902632649, - "grad_norm": 6.564632892608643, - "learning_rate": 4.7247711828945584e-05, - "loss": 0.7985, - "step": 18680 - }, - { - "epoch": 0.16522569352357716, - "grad_norm": 4.772511005401611, - "learning_rate": 4.724623844127372e-05, - "loss": 0.853, - "step": 18690 - }, - { - "epoch": 0.1653140967838894, - "grad_norm": 2.7908151149749756, - "learning_rate": 4.724476505360185e-05, - "loss": 0.9016, - "step": 18700 - }, - { - "epoch": 0.16540250004420162, - "grad_norm": 3.49704909324646, - "learning_rate": 4.7243291665929975e-05, - "loss": 0.751, - "step": 18710 - }, - { - "epoch": 0.16549090330451388, - "grad_norm": 8.343111038208008, - "learning_rate": 4.7241818278258104e-05, - "loss": 0.6942, - "step": 18720 - }, - { - "epoch": 0.1655793065648261, - "grad_norm": 10.985702514648438, - "learning_rate": 4.724034489058624e-05, - "loss": 0.8471, - "step": 18730 - }, - { - "epoch": 0.16566770982513834, - "grad_norm": 8.541181564331055, - "learning_rate": 4.723887150291436e-05, - "loss": 0.8589, - "step": 18740 - }, - { - "epoch": 0.1657561130854506, - "grad_norm": 3.302056074142456, - "learning_rate": 4.7237398115242496e-05, - "loss": 0.7596, - "step": 18750 - }, - { - "epoch": 0.16584451634576283, - "grad_norm": 2.5116047859191895, - "learning_rate": 4.7235924727570624e-05, - "loss": 0.8419, - "step": 18760 - }, - { - "epoch": 0.16593291960607506, - "grad_norm": 3.3291523456573486, - "learning_rate": 4.723445133989875e-05, - "loss": 0.7754, - "step": 18770 - }, - { - "epoch": 0.16602132286638732, - "grad_norm": 4.486542701721191, - "learning_rate": 4.723297795222688e-05, - "loss": 0.7598, - "step": 18780 - }, - { - "epoch": 0.16610972612669955, - "grad_norm": 3.321672201156616, - "learning_rate": 4.7231504564555016e-05, - "loss": 0.616, - "step": 18790 - }, - { - "epoch": 0.16619812938701178, - "grad_norm": 3.0971567630767822, - "learning_rate": 4.723003117688314e-05, - "loss": 0.7112, - "step": 18800 - }, - { - "epoch": 0.16628653264732404, - "grad_norm": 3.232220411300659, - "learning_rate": 4.722855778921127e-05, - "loss": 0.815, - "step": 18810 - }, - { - "epoch": 0.16637493590763627, - "grad_norm": 4.4582295417785645, - "learning_rate": 4.7227084401539394e-05, - "loss": 0.7364, - "step": 18820 - }, - { - "epoch": 0.1664633391679485, - "grad_norm": 3.4270033836364746, - "learning_rate": 4.722561101386753e-05, - "loss": 0.6765, - "step": 18830 - }, - { - "epoch": 0.16655174242826076, - "grad_norm": 5.478305816650391, - "learning_rate": 4.722413762619566e-05, - "loss": 0.8738, - "step": 18840 - }, - { - "epoch": 0.166640145688573, - "grad_norm": 4.589667797088623, - "learning_rate": 4.7222664238523786e-05, - "loss": 0.7627, - "step": 18850 - }, - { - "epoch": 0.16672854894888522, - "grad_norm": 3.996263265609741, - "learning_rate": 4.7221190850851914e-05, - "loss": 0.6948, - "step": 18860 - }, - { - "epoch": 0.16681695220919748, - "grad_norm": 3.293722152709961, - "learning_rate": 4.721971746318005e-05, - "loss": 0.8482, - "step": 18870 - }, - { - "epoch": 0.16690535546950971, - "grad_norm": 9.512335777282715, - "learning_rate": 4.721824407550817e-05, - "loss": 0.7064, - "step": 18880 - }, - { - "epoch": 0.16699375872982195, - "grad_norm": 5.593800067901611, - "learning_rate": 4.7216770687836306e-05, - "loss": 0.843, - "step": 18890 - }, - { - "epoch": 0.1670821619901342, - "grad_norm": 3.658989191055298, - "learning_rate": 4.7215297300164435e-05, - "loss": 0.8077, - "step": 18900 - }, - { - "epoch": 0.16717056525044643, - "grad_norm": 7.332565784454346, - "learning_rate": 4.721382391249256e-05, - "loss": 0.7967, - "step": 18910 - }, - { - "epoch": 0.16725896851075867, - "grad_norm": 6.507866382598877, - "learning_rate": 4.721235052482069e-05, - "loss": 0.8039, - "step": 18920 - }, - { - "epoch": 0.16734737177107092, - "grad_norm": 10.709056854248047, - "learning_rate": 4.721087713714882e-05, - "loss": 0.751, - "step": 18930 - }, - { - "epoch": 0.16743577503138315, - "grad_norm": 7.497487545013428, - "learning_rate": 4.720940374947695e-05, - "loss": 0.9073, - "step": 18940 - }, - { - "epoch": 0.16752417829169539, - "grad_norm": 6.352433204650879, - "learning_rate": 4.720793036180508e-05, - "loss": 0.8455, - "step": 18950 - }, - { - "epoch": 0.16761258155200764, - "grad_norm": 3.689246892929077, - "learning_rate": 4.7206456974133205e-05, - "loss": 0.8592, - "step": 18960 - }, - { - "epoch": 0.16770098481231988, - "grad_norm": 3.891657829284668, - "learning_rate": 4.720498358646134e-05, - "loss": 0.8722, - "step": 18970 - }, - { - "epoch": 0.1677893880726321, - "grad_norm": 4.450270652770996, - "learning_rate": 4.720351019878947e-05, - "loss": 0.7952, - "step": 18980 - }, - { - "epoch": 0.16787779133294436, - "grad_norm": 4.3971781730651855, - "learning_rate": 4.7202036811117596e-05, - "loss": 0.7979, - "step": 18990 - }, - { - "epoch": 0.1679661945932566, - "grad_norm": 4.213810920715332, - "learning_rate": 4.7200563423445725e-05, - "loss": 0.7506, - "step": 19000 - }, - { - "epoch": 0.16805459785356883, - "grad_norm": 4.515845775604248, - "learning_rate": 4.719909003577386e-05, - "loss": 0.8104, - "step": 19010 - }, - { - "epoch": 0.16814300111388109, - "grad_norm": 7.110860824584961, - "learning_rate": 4.719761664810198e-05, - "loss": 0.7089, - "step": 19020 - }, - { - "epoch": 0.16823140437419332, - "grad_norm": 4.53788948059082, - "learning_rate": 4.719614326043012e-05, - "loss": 0.8878, - "step": 19030 - }, - { - "epoch": 0.16831980763450555, - "grad_norm": 8.444923400878906, - "learning_rate": 4.719466987275824e-05, - "loss": 0.7488, - "step": 19040 - }, - { - "epoch": 0.1684082108948178, - "grad_norm": 3.3344855308532715, - "learning_rate": 4.719319648508637e-05, - "loss": 0.8422, - "step": 19050 - }, - { - "epoch": 0.16849661415513004, - "grad_norm": 4.313567161560059, - "learning_rate": 4.71917230974145e-05, - "loss": 0.772, - "step": 19060 - }, - { - "epoch": 0.1685850174154423, - "grad_norm": 7.400096416473389, - "learning_rate": 4.719024970974263e-05, - "loss": 0.7867, - "step": 19070 - }, - { - "epoch": 0.16867342067575453, - "grad_norm": 7.661045074462891, - "learning_rate": 4.718877632207076e-05, - "loss": 0.7241, - "step": 19080 - }, - { - "epoch": 0.16876182393606676, - "grad_norm": 4.5807271003723145, - "learning_rate": 4.7187302934398894e-05, - "loss": 0.7532, - "step": 19090 - }, - { - "epoch": 0.16885022719637902, - "grad_norm": 5.509703636169434, - "learning_rate": 4.7185829546727015e-05, - "loss": 0.6222, - "step": 19100 - }, - { - "epoch": 0.16893863045669125, - "grad_norm": 7.484439373016357, - "learning_rate": 4.718435615905515e-05, - "loss": 0.7193, - "step": 19110 - }, - { - "epoch": 0.16902703371700348, - "grad_norm": 7.234420299530029, - "learning_rate": 4.718288277138328e-05, - "loss": 0.9654, - "step": 19120 - }, - { - "epoch": 0.16911543697731574, - "grad_norm": 5.691709041595459, - "learning_rate": 4.718140938371141e-05, - "loss": 0.7726, - "step": 19130 - }, - { - "epoch": 0.16920384023762797, - "grad_norm": 5.517917156219482, - "learning_rate": 4.7179935996039535e-05, - "loss": 0.9405, - "step": 19140 - }, - { - "epoch": 0.1692922434979402, - "grad_norm": 4.151499271392822, - "learning_rate": 4.7178462608367664e-05, - "loss": 0.7854, - "step": 19150 - }, - { - "epoch": 0.16938064675825246, - "grad_norm": 8.64465618133545, - "learning_rate": 4.717698922069579e-05, - "loss": 0.9154, - "step": 19160 - }, - { - "epoch": 0.1694690500185647, - "grad_norm": 4.667221546173096, - "learning_rate": 4.717551583302393e-05, - "loss": 0.8714, - "step": 19170 - }, - { - "epoch": 0.16955745327887692, - "grad_norm": 2.7628471851348877, - "learning_rate": 4.717404244535205e-05, - "loss": 0.8054, - "step": 19180 - }, - { - "epoch": 0.16964585653918918, - "grad_norm": 2.8770718574523926, - "learning_rate": 4.7172569057680184e-05, - "loss": 0.7926, - "step": 19190 - }, - { - "epoch": 0.1697342597995014, - "grad_norm": 11.768014907836914, - "learning_rate": 4.717109567000831e-05, - "loss": 0.8588, - "step": 19200 - }, - { - "epoch": 0.16982266305981364, - "grad_norm": 6.627838611602783, - "learning_rate": 4.716962228233644e-05, - "loss": 0.7746, - "step": 19210 - }, - { - "epoch": 0.1699110663201259, - "grad_norm": 4.2306294441223145, - "learning_rate": 4.716814889466457e-05, - "loss": 0.6987, - "step": 19220 - }, - { - "epoch": 0.16999946958043813, - "grad_norm": 2.051795244216919, - "learning_rate": 4.7166675506992704e-05, - "loss": 0.7172, - "step": 19230 - }, - { - "epoch": 0.17008787284075036, - "grad_norm": 3.473698377609253, - "learning_rate": 4.7165202119320826e-05, - "loss": 0.7591, - "step": 19240 - }, - { - "epoch": 0.17017627610106262, - "grad_norm": 3.525426149368286, - "learning_rate": 4.716372873164896e-05, - "loss": 0.7131, - "step": 19250 - }, - { - "epoch": 0.17026467936137485, - "grad_norm": 1.7267205715179443, - "learning_rate": 4.716225534397709e-05, - "loss": 0.6918, - "step": 19260 - }, - { - "epoch": 0.17035308262168708, - "grad_norm": 11.006839752197266, - "learning_rate": 4.716078195630522e-05, - "loss": 0.8123, - "step": 19270 - }, - { - "epoch": 0.17044148588199934, - "grad_norm": 5.82366943359375, - "learning_rate": 4.7159308568633346e-05, - "loss": 0.749, - "step": 19280 - }, - { - "epoch": 0.17052988914231157, - "grad_norm": 6.12000036239624, - "learning_rate": 4.7157835180961474e-05, - "loss": 0.6957, - "step": 19290 - }, - { - "epoch": 0.1706182924026238, - "grad_norm": 8.563779830932617, - "learning_rate": 4.71563617932896e-05, - "loss": 0.7675, - "step": 19300 - }, - { - "epoch": 0.17070669566293606, - "grad_norm": 4.752929210662842, - "learning_rate": 4.715488840561774e-05, - "loss": 0.7587, - "step": 19310 - }, - { - "epoch": 0.1707950989232483, - "grad_norm": 1.848655343055725, - "learning_rate": 4.715341501794586e-05, - "loss": 0.6361, - "step": 19320 - }, - { - "epoch": 0.17088350218356052, - "grad_norm": 11.473823547363281, - "learning_rate": 4.7151941630273994e-05, - "loss": 0.8394, - "step": 19330 - }, - { - "epoch": 0.17097190544387278, - "grad_norm": 5.776627540588379, - "learning_rate": 4.715046824260212e-05, - "loss": 0.7531, - "step": 19340 - }, - { - "epoch": 0.171060308704185, - "grad_norm": 3.338092088699341, - "learning_rate": 4.714899485493025e-05, - "loss": 0.8278, - "step": 19350 - }, - { - "epoch": 0.17114871196449724, - "grad_norm": 3.929267406463623, - "learning_rate": 4.714752146725838e-05, - "loss": 0.7747, - "step": 19360 - }, - { - "epoch": 0.1712371152248095, - "grad_norm": 11.310409545898438, - "learning_rate": 4.7146048079586515e-05, - "loss": 0.7816, - "step": 19370 - }, - { - "epoch": 0.17132551848512173, - "grad_norm": 3.5103700160980225, - "learning_rate": 4.7144574691914636e-05, - "loss": 0.8185, - "step": 19380 - }, - { - "epoch": 0.17141392174543396, - "grad_norm": 3.7547149658203125, - "learning_rate": 4.714310130424277e-05, - "loss": 0.731, - "step": 19390 - }, - { - "epoch": 0.17150232500574622, - "grad_norm": 4.463881969451904, - "learning_rate": 4.71416279165709e-05, - "loss": 0.7501, - "step": 19400 - }, - { - "epoch": 0.17159072826605845, - "grad_norm": 4.538525104522705, - "learning_rate": 4.714015452889903e-05, - "loss": 0.9555, - "step": 19410 - }, - { - "epoch": 0.17167913152637068, - "grad_norm": 5.343875408172607, - "learning_rate": 4.7138681141227156e-05, - "loss": 0.8399, - "step": 19420 - }, - { - "epoch": 0.17176753478668294, - "grad_norm": 5.001431941986084, - "learning_rate": 4.7137207753555285e-05, - "loss": 0.8188, - "step": 19430 - }, - { - "epoch": 0.17185593804699517, - "grad_norm": 5.407555103302002, - "learning_rate": 4.713573436588341e-05, - "loss": 0.6176, - "step": 19440 - }, - { - "epoch": 0.1719443413073074, - "grad_norm": 3.2855517864227295, - "learning_rate": 4.713426097821155e-05, - "loss": 0.8608, - "step": 19450 - }, - { - "epoch": 0.17203274456761966, - "grad_norm": 10.408510208129883, - "learning_rate": 4.7132787590539677e-05, - "loss": 0.7515, - "step": 19460 - }, - { - "epoch": 0.1721211478279319, - "grad_norm": 5.119137763977051, - "learning_rate": 4.7131314202867805e-05, - "loss": 0.8096, - "step": 19470 - }, - { - "epoch": 0.17220955108824412, - "grad_norm": 4.531383991241455, - "learning_rate": 4.712984081519593e-05, - "loss": 0.8578, - "step": 19480 - }, - { - "epoch": 0.17229795434855638, - "grad_norm": 5.950822830200195, - "learning_rate": 4.712836742752406e-05, - "loss": 0.8478, - "step": 19490 - }, - { - "epoch": 0.1723863576088686, - "grad_norm": 8.27846908569336, - "learning_rate": 4.712689403985219e-05, - "loss": 0.787, - "step": 19500 - }, - { - "epoch": 0.17247476086918084, - "grad_norm": 4.818996429443359, - "learning_rate": 4.712542065218032e-05, - "loss": 0.7874, - "step": 19510 - }, - { - "epoch": 0.1725631641294931, - "grad_norm": 8.866082191467285, - "learning_rate": 4.7123947264508453e-05, - "loss": 0.8239, - "step": 19520 - }, - { - "epoch": 0.17265156738980533, - "grad_norm": 5.025813579559326, - "learning_rate": 4.712247387683658e-05, - "loss": 0.7968, - "step": 19530 - }, - { - "epoch": 0.17273997065011756, - "grad_norm": 3.0801727771759033, - "learning_rate": 4.712100048916471e-05, - "loss": 0.7977, - "step": 19540 - }, - { - "epoch": 0.17282837391042982, - "grad_norm": 2.6314749717712402, - "learning_rate": 4.711952710149284e-05, - "loss": 0.8456, - "step": 19550 - }, - { - "epoch": 0.17291677717074205, - "grad_norm": 7.432890892028809, - "learning_rate": 4.711805371382097e-05, - "loss": 0.8546, - "step": 19560 - }, - { - "epoch": 0.17300518043105428, - "grad_norm": 7.585647106170654, - "learning_rate": 4.7116580326149095e-05, - "loss": 0.8915, - "step": 19570 - }, - { - "epoch": 0.17309358369136654, - "grad_norm": 5.4618988037109375, - "learning_rate": 4.711510693847723e-05, - "loss": 0.7779, - "step": 19580 - }, - { - "epoch": 0.17318198695167877, - "grad_norm": 4.218719959259033, - "learning_rate": 4.711363355080536e-05, - "loss": 0.8346, - "step": 19590 - }, - { - "epoch": 0.173270390211991, - "grad_norm": 5.693262577056885, - "learning_rate": 4.711216016313349e-05, - "loss": 0.8077, - "step": 19600 - }, - { - "epoch": 0.17335879347230326, - "grad_norm": 3.136305332183838, - "learning_rate": 4.7110686775461615e-05, - "loss": 0.7089, - "step": 19610 - }, - { - "epoch": 0.1734471967326155, - "grad_norm": 10.014686584472656, - "learning_rate": 4.7109213387789744e-05, - "loss": 0.8659, - "step": 19620 - }, - { - "epoch": 0.17353559999292775, - "grad_norm": 4.309294700622559, - "learning_rate": 4.710774000011787e-05, - "loss": 0.851, - "step": 19630 - }, - { - "epoch": 0.17362400325323998, - "grad_norm": 3.900099277496338, - "learning_rate": 4.710626661244601e-05, - "loss": 0.9194, - "step": 19640 - }, - { - "epoch": 0.17371240651355221, - "grad_norm": 8.823202133178711, - "learning_rate": 4.710479322477413e-05, - "loss": 0.7955, - "step": 19650 - }, - { - "epoch": 0.17380080977386447, - "grad_norm": 3.3904662132263184, - "learning_rate": 4.7103319837102264e-05, - "loss": 0.8078, - "step": 19660 - }, - { - "epoch": 0.1738892130341767, - "grad_norm": 2.620433807373047, - "learning_rate": 4.710184644943039e-05, - "loss": 0.687, - "step": 19670 - }, - { - "epoch": 0.17397761629448893, - "grad_norm": 6.136517524719238, - "learning_rate": 4.710037306175852e-05, - "loss": 0.7462, - "step": 19680 - }, - { - "epoch": 0.1740660195548012, - "grad_norm": 9.900436401367188, - "learning_rate": 4.709889967408665e-05, - "loss": 0.8872, - "step": 19690 - }, - { - "epoch": 0.17415442281511342, - "grad_norm": 3.0412752628326416, - "learning_rate": 4.7097426286414784e-05, - "loss": 0.7468, - "step": 19700 - }, - { - "epoch": 0.17424282607542566, - "grad_norm": 7.812278747558594, - "learning_rate": 4.7095952898742906e-05, - "loss": 0.7556, - "step": 19710 - }, - { - "epoch": 0.17433122933573791, - "grad_norm": 4.977499485015869, - "learning_rate": 4.709447951107104e-05, - "loss": 0.9045, - "step": 19720 - }, - { - "epoch": 0.17441963259605014, - "grad_norm": 7.382686138153076, - "learning_rate": 4.709300612339917e-05, - "loss": 0.9253, - "step": 19730 - }, - { - "epoch": 0.17450803585636238, - "grad_norm": 2.005552291870117, - "learning_rate": 4.70915327357273e-05, - "loss": 0.7806, - "step": 19740 - }, - { - "epoch": 0.17459643911667463, - "grad_norm": 5.624714374542236, - "learning_rate": 4.7090059348055426e-05, - "loss": 0.7612, - "step": 19750 - }, - { - "epoch": 0.17468484237698687, - "grad_norm": 5.911917209625244, - "learning_rate": 4.7088585960383554e-05, - "loss": 0.8858, - "step": 19760 - }, - { - "epoch": 0.1747732456372991, - "grad_norm": 4.969256401062012, - "learning_rate": 4.708711257271168e-05, - "loss": 0.8302, - "step": 19770 - }, - { - "epoch": 0.17486164889761135, - "grad_norm": 6.530728816986084, - "learning_rate": 4.708563918503982e-05, - "loss": 0.7979, - "step": 19780 - }, - { - "epoch": 0.17495005215792359, - "grad_norm": 2.1959164142608643, - "learning_rate": 4.708416579736794e-05, - "loss": 0.8566, - "step": 19790 - }, - { - "epoch": 0.17503845541823582, - "grad_norm": 5.137004852294922, - "learning_rate": 4.7082692409696074e-05, - "loss": 0.7901, - "step": 19800 - }, - { - "epoch": 0.17512685867854808, - "grad_norm": 2.6731183528900146, - "learning_rate": 4.70812190220242e-05, - "loss": 0.7823, - "step": 19810 - }, - { - "epoch": 0.1752152619388603, - "grad_norm": 4.632927417755127, - "learning_rate": 4.707974563435233e-05, - "loss": 0.765, - "step": 19820 - }, - { - "epoch": 0.17530366519917254, - "grad_norm": 8.491394996643066, - "learning_rate": 4.707827224668046e-05, - "loss": 0.8173, - "step": 19830 - }, - { - "epoch": 0.1753920684594848, - "grad_norm": 2.1966021060943604, - "learning_rate": 4.7076798859008595e-05, - "loss": 0.8613, - "step": 19840 - }, - { - "epoch": 0.17548047171979703, - "grad_norm": 6.492778778076172, - "learning_rate": 4.7075325471336716e-05, - "loss": 0.8378, - "step": 19850 - }, - { - "epoch": 0.17556887498010926, - "grad_norm": 1.9610486030578613, - "learning_rate": 4.707385208366485e-05, - "loss": 0.7533, - "step": 19860 - }, - { - "epoch": 0.17565727824042152, - "grad_norm": 4.732749938964844, - "learning_rate": 4.707237869599297e-05, - "loss": 0.8484, - "step": 19870 - }, - { - "epoch": 0.17574568150073375, - "grad_norm": 4.292774677276611, - "learning_rate": 4.707090530832111e-05, - "loss": 0.8442, - "step": 19880 - }, - { - "epoch": 0.17583408476104598, - "grad_norm": 3.8512213230133057, - "learning_rate": 4.7069431920649236e-05, - "loss": 0.7514, - "step": 19890 - }, - { - "epoch": 0.17592248802135824, - "grad_norm": 5.678794860839844, - "learning_rate": 4.7067958532977365e-05, - "loss": 0.8178, - "step": 19900 - }, - { - "epoch": 0.17601089128167047, - "grad_norm": 6.57100772857666, - "learning_rate": 4.706648514530549e-05, - "loss": 0.7885, - "step": 19910 - }, - { - "epoch": 0.1760992945419827, - "grad_norm": 8.180868148803711, - "learning_rate": 4.706501175763363e-05, - "loss": 0.7123, - "step": 19920 - }, - { - "epoch": 0.17618769780229496, - "grad_norm": 4.43673849105835, - "learning_rate": 4.706353836996175e-05, - "loss": 0.8278, - "step": 19930 - }, - { - "epoch": 0.1762761010626072, - "grad_norm": 5.025807857513428, - "learning_rate": 4.7062064982289885e-05, - "loss": 0.7896, - "step": 19940 - }, - { - "epoch": 0.17636450432291942, - "grad_norm": 6.425665378570557, - "learning_rate": 4.706059159461801e-05, - "loss": 0.8596, - "step": 19950 - }, - { - "epoch": 0.17645290758323168, - "grad_norm": 5.0455241203308105, - "learning_rate": 4.705911820694614e-05, - "loss": 0.7195, - "step": 19960 - }, - { - "epoch": 0.1765413108435439, - "grad_norm": 5.214970111846924, - "learning_rate": 4.705764481927427e-05, - "loss": 0.7317, - "step": 19970 - }, - { - "epoch": 0.17662971410385614, - "grad_norm": 5.591923713684082, - "learning_rate": 4.70561714316024e-05, - "loss": 0.8803, - "step": 19980 - }, - { - "epoch": 0.1767181173641684, - "grad_norm": 6.58145809173584, - "learning_rate": 4.705469804393053e-05, - "loss": 0.8799, - "step": 19990 - }, - { - "epoch": 0.17680652062448063, - "grad_norm": 6.532370567321777, - "learning_rate": 4.705322465625866e-05, - "loss": 0.8206, - "step": 20000 - }, - { - "epoch": 0.17689492388479286, - "grad_norm": 7.6214518547058105, - "learning_rate": 4.7051751268586783e-05, - "loss": 0.7727, - "step": 20010 - }, - { - "epoch": 0.17698332714510512, - "grad_norm": 3.491711139678955, - "learning_rate": 4.705027788091492e-05, - "loss": 0.6936, - "step": 20020 - }, - { - "epoch": 0.17707173040541735, - "grad_norm": 2.3454463481903076, - "learning_rate": 4.704880449324305e-05, - "loss": 0.7526, - "step": 20030 - }, - { - "epoch": 0.17716013366572958, - "grad_norm": 2.3436224460601807, - "learning_rate": 4.7047331105571175e-05, - "loss": 0.7448, - "step": 20040 - }, - { - "epoch": 0.17724853692604184, - "grad_norm": 4.434281349182129, - "learning_rate": 4.7045857717899304e-05, - "loss": 0.7042, - "step": 20050 - }, - { - "epoch": 0.17733694018635407, - "grad_norm": 3.948333501815796, - "learning_rate": 4.704438433022744e-05, - "loss": 0.7103, - "step": 20060 - }, - { - "epoch": 0.1774253434466663, - "grad_norm": 4.56276798248291, - "learning_rate": 4.704291094255556e-05, - "loss": 0.9007, - "step": 20070 - }, - { - "epoch": 0.17751374670697856, - "grad_norm": 3.5347900390625, - "learning_rate": 4.7041437554883695e-05, - "loss": 0.6617, - "step": 20080 - }, - { - "epoch": 0.1776021499672908, - "grad_norm": 3.578092575073242, - "learning_rate": 4.703996416721182e-05, - "loss": 0.7879, - "step": 20090 - }, - { - "epoch": 0.17769055322760302, - "grad_norm": 5.835831165313721, - "learning_rate": 4.703849077953995e-05, - "loss": 0.8649, - "step": 20100 - }, - { - "epoch": 0.17777895648791528, - "grad_norm": 3.1239800453186035, - "learning_rate": 4.703701739186808e-05, - "loss": 0.6506, - "step": 20110 - }, - { - "epoch": 0.1778673597482275, - "grad_norm": 10.112980842590332, - "learning_rate": 4.703554400419621e-05, - "loss": 0.7838, - "step": 20120 - }, - { - "epoch": 0.17795576300853974, - "grad_norm": 4.310737133026123, - "learning_rate": 4.703407061652434e-05, - "loss": 0.8217, - "step": 20130 - }, - { - "epoch": 0.178044166268852, - "grad_norm": 2.8160488605499268, - "learning_rate": 4.703259722885247e-05, - "loss": 0.7248, - "step": 20140 - }, - { - "epoch": 0.17813256952916423, - "grad_norm": 13.706145286560059, - "learning_rate": 4.7031123841180594e-05, - "loss": 1.007, - "step": 20150 - }, - { - "epoch": 0.1782209727894765, - "grad_norm": 8.242176055908203, - "learning_rate": 4.702965045350873e-05, - "loss": 0.7377, - "step": 20160 - }, - { - "epoch": 0.17830937604978872, - "grad_norm": 4.9279046058654785, - "learning_rate": 4.702817706583686e-05, - "loss": 0.8589, - "step": 20170 - }, - { - "epoch": 0.17839777931010095, - "grad_norm": 4.710366725921631, - "learning_rate": 4.7026703678164986e-05, - "loss": 0.7259, - "step": 20180 - }, - { - "epoch": 0.1784861825704132, - "grad_norm": 2.227062225341797, - "learning_rate": 4.7025230290493114e-05, - "loss": 0.7803, - "step": 20190 - }, - { - "epoch": 0.17857458583072544, - "grad_norm": 8.307478904724121, - "learning_rate": 4.702375690282125e-05, - "loss": 0.8592, - "step": 20200 - }, - { - "epoch": 0.17866298909103767, - "grad_norm": 3.467332363128662, - "learning_rate": 4.702228351514937e-05, - "loss": 0.6721, - "step": 20210 - }, - { - "epoch": 0.17875139235134993, - "grad_norm": 4.272871494293213, - "learning_rate": 4.7020810127477506e-05, - "loss": 0.6416, - "step": 20220 - }, - { - "epoch": 0.17883979561166216, - "grad_norm": 6.130920886993408, - "learning_rate": 4.701933673980563e-05, - "loss": 0.7404, - "step": 20230 - }, - { - "epoch": 0.1789281988719744, - "grad_norm": 3.2900898456573486, - "learning_rate": 4.701786335213376e-05, - "loss": 0.7589, - "step": 20240 - }, - { - "epoch": 0.17901660213228665, - "grad_norm": 6.0761542320251465, - "learning_rate": 4.701638996446189e-05, - "loss": 0.7381, - "step": 20250 - }, - { - "epoch": 0.17910500539259888, - "grad_norm": 6.198101997375488, - "learning_rate": 4.701491657679002e-05, - "loss": 0.8172, - "step": 20260 - }, - { - "epoch": 0.1791934086529111, - "grad_norm": 5.321105480194092, - "learning_rate": 4.701344318911815e-05, - "loss": 0.8677, - "step": 20270 - }, - { - "epoch": 0.17928181191322337, - "grad_norm": 7.723829746246338, - "learning_rate": 4.701196980144628e-05, - "loss": 0.8929, - "step": 20280 - }, - { - "epoch": 0.1793702151735356, - "grad_norm": 3.7290022373199463, - "learning_rate": 4.7010496413774404e-05, - "loss": 0.7007, - "step": 20290 - }, - { - "epoch": 0.17945861843384783, - "grad_norm": 9.33781623840332, - "learning_rate": 4.700902302610254e-05, - "loss": 0.8504, - "step": 20300 - }, - { - "epoch": 0.1795470216941601, - "grad_norm": 4.633574485778809, - "learning_rate": 4.700754963843067e-05, - "loss": 0.9296, - "step": 20310 - }, - { - "epoch": 0.17963542495447232, - "grad_norm": 4.936581611633301, - "learning_rate": 4.7006076250758796e-05, - "loss": 0.795, - "step": 20320 - }, - { - "epoch": 0.17972382821478455, - "grad_norm": 7.54608154296875, - "learning_rate": 4.7004602863086925e-05, - "loss": 0.7732, - "step": 20330 - }, - { - "epoch": 0.1798122314750968, - "grad_norm": 4.504227161407471, - "learning_rate": 4.700312947541505e-05, - "loss": 0.8954, - "step": 20340 - }, - { - "epoch": 0.17990063473540904, - "grad_norm": 5.178501605987549, - "learning_rate": 4.700165608774318e-05, - "loss": 0.7758, - "step": 20350 - }, - { - "epoch": 0.17998903799572127, - "grad_norm": 3.549858331680298, - "learning_rate": 4.7000182700071317e-05, - "loss": 0.6726, - "step": 20360 - }, - { - "epoch": 0.18007744125603353, - "grad_norm": 6.557262420654297, - "learning_rate": 4.6998709312399445e-05, - "loss": 0.8728, - "step": 20370 - }, - { - "epoch": 0.18016584451634576, - "grad_norm": 5.670749664306641, - "learning_rate": 4.699723592472757e-05, - "loss": 0.7527, - "step": 20380 - }, - { - "epoch": 0.180254247776658, - "grad_norm": 6.3349833488464355, - "learning_rate": 4.69957625370557e-05, - "loss": 0.8495, - "step": 20390 - }, - { - "epoch": 0.18034265103697025, - "grad_norm": 3.4349277019500732, - "learning_rate": 4.699428914938383e-05, - "loss": 0.7331, - "step": 20400 - }, - { - "epoch": 0.18043105429728248, - "grad_norm": 7.19025182723999, - "learning_rate": 4.699281576171196e-05, - "loss": 0.7024, - "step": 20410 - }, - { - "epoch": 0.18051945755759471, - "grad_norm": 2.892963409423828, - "learning_rate": 4.6991342374040093e-05, - "loss": 0.862, - "step": 20420 - }, - { - "epoch": 0.18060786081790697, - "grad_norm": 6.494869232177734, - "learning_rate": 4.698986898636822e-05, - "loss": 0.7154, - "step": 20430 - }, - { - "epoch": 0.1806962640782192, - "grad_norm": 4.208085536956787, - "learning_rate": 4.698839559869635e-05, - "loss": 0.8957, - "step": 20440 - }, - { - "epoch": 0.18078466733853144, - "grad_norm": 9.377950668334961, - "learning_rate": 4.698692221102448e-05, - "loss": 0.8051, - "step": 20450 - }, - { - "epoch": 0.1808730705988437, - "grad_norm": 3.125303268432617, - "learning_rate": 4.698544882335261e-05, - "loss": 0.8558, - "step": 20460 - }, - { - "epoch": 0.18096147385915592, - "grad_norm": 1.9119974374771118, - "learning_rate": 4.6983975435680735e-05, - "loss": 0.8276, - "step": 20470 - }, - { - "epoch": 0.18104987711946816, - "grad_norm": 6.218015670776367, - "learning_rate": 4.6982502048008864e-05, - "loss": 0.8171, - "step": 20480 - }, - { - "epoch": 0.18113828037978041, - "grad_norm": 6.455350875854492, - "learning_rate": 4.6981028660337e-05, - "loss": 0.833, - "step": 20490 - }, - { - "epoch": 0.18122668364009265, - "grad_norm": 8.490579605102539, - "learning_rate": 4.697955527266513e-05, - "loss": 0.8084, - "step": 20500 - }, - { - "epoch": 0.18131508690040488, - "grad_norm": 9.880264282226562, - "learning_rate": 4.6978081884993255e-05, - "loss": 0.7329, - "step": 20510 - }, - { - "epoch": 0.18140349016071713, - "grad_norm": 4.4080681800842285, - "learning_rate": 4.6976608497321384e-05, - "loss": 0.8693, - "step": 20520 - }, - { - "epoch": 0.18149189342102937, - "grad_norm": 6.001032829284668, - "learning_rate": 4.697513510964951e-05, - "loss": 0.6817, - "step": 20530 - }, - { - "epoch": 0.1815802966813416, - "grad_norm": 10.893025398254395, - "learning_rate": 4.697366172197764e-05, - "loss": 0.7788, - "step": 20540 - }, - { - "epoch": 0.18166869994165386, - "grad_norm": 6.218920707702637, - "learning_rate": 4.6972188334305776e-05, - "loss": 0.7104, - "step": 20550 - }, - { - "epoch": 0.18175710320196609, - "grad_norm": 3.348966598510742, - "learning_rate": 4.69707149466339e-05, - "loss": 0.8124, - "step": 20560 - }, - { - "epoch": 0.18184550646227832, - "grad_norm": 4.686115741729736, - "learning_rate": 4.696924155896203e-05, - "loss": 0.7374, - "step": 20570 - }, - { - "epoch": 0.18193390972259058, - "grad_norm": 5.5031609535217285, - "learning_rate": 4.696776817129016e-05, - "loss": 0.8236, - "step": 20580 - }, - { - "epoch": 0.1820223129829028, - "grad_norm": 7.1041975021362305, - "learning_rate": 4.696629478361829e-05, - "loss": 0.7116, - "step": 20590 - }, - { - "epoch": 0.18211071624321504, - "grad_norm": 3.8152554035186768, - "learning_rate": 4.696482139594642e-05, - "loss": 0.7432, - "step": 20600 - }, - { - "epoch": 0.1821991195035273, - "grad_norm": 2.5621113777160645, - "learning_rate": 4.696334800827455e-05, - "loss": 0.7318, - "step": 20610 - }, - { - "epoch": 0.18228752276383953, - "grad_norm": 11.487798690795898, - "learning_rate": 4.6961874620602674e-05, - "loss": 0.8046, - "step": 20620 - }, - { - "epoch": 0.18237592602415176, - "grad_norm": 6.597385406494141, - "learning_rate": 4.696040123293081e-05, - "loss": 0.7962, - "step": 20630 - }, - { - "epoch": 0.18246432928446402, - "grad_norm": 5.777371883392334, - "learning_rate": 4.695892784525894e-05, - "loss": 0.8109, - "step": 20640 - }, - { - "epoch": 0.18255273254477625, - "grad_norm": 4.814777851104736, - "learning_rate": 4.6957454457587066e-05, - "loss": 0.766, - "step": 20650 - }, - { - "epoch": 0.18264113580508848, - "grad_norm": 9.146038055419922, - "learning_rate": 4.6955981069915194e-05, - "loss": 0.8558, - "step": 20660 - }, - { - "epoch": 0.18272953906540074, - "grad_norm": 11.374016761779785, - "learning_rate": 4.695450768224333e-05, - "loss": 0.7406, - "step": 20670 - }, - { - "epoch": 0.18281794232571297, - "grad_norm": 2.69401478767395, - "learning_rate": 4.695303429457145e-05, - "loss": 0.7204, - "step": 20680 - }, - { - "epoch": 0.18290634558602523, - "grad_norm": 6.359562397003174, - "learning_rate": 4.6951560906899586e-05, - "loss": 0.7661, - "step": 20690 - }, - { - "epoch": 0.18299474884633746, - "grad_norm": 2.5801827907562256, - "learning_rate": 4.695008751922771e-05, - "loss": 0.704, - "step": 20700 - }, - { - "epoch": 0.1830831521066497, - "grad_norm": 2.6450891494750977, - "learning_rate": 4.694861413155584e-05, - "loss": 0.8052, - "step": 20710 - }, - { - "epoch": 0.18317155536696195, - "grad_norm": 3.069169282913208, - "learning_rate": 4.694714074388397e-05, - "loss": 0.8935, - "step": 20720 - }, - { - "epoch": 0.18325995862727418, - "grad_norm": 2.230119228363037, - "learning_rate": 4.69456673562121e-05, - "loss": 0.8753, - "step": 20730 - }, - { - "epoch": 0.1833483618875864, - "grad_norm": 6.34372615814209, - "learning_rate": 4.694419396854023e-05, - "loss": 0.6104, - "step": 20740 - }, - { - "epoch": 0.18343676514789867, - "grad_norm": 11.162723541259766, - "learning_rate": 4.694272058086836e-05, - "loss": 0.856, - "step": 20750 - }, - { - "epoch": 0.1835251684082109, - "grad_norm": 5.337286949157715, - "learning_rate": 4.6941247193196485e-05, - "loss": 0.7558, - "step": 20760 - }, - { - "epoch": 0.18361357166852313, - "grad_norm": 2.351268768310547, - "learning_rate": 4.693977380552462e-05, - "loss": 0.7964, - "step": 20770 - }, - { - "epoch": 0.1837019749288354, - "grad_norm": 11.143387794494629, - "learning_rate": 4.693830041785275e-05, - "loss": 0.7412, - "step": 20780 - }, - { - "epoch": 0.18379037818914762, - "grad_norm": 10.03717041015625, - "learning_rate": 4.6936827030180876e-05, - "loss": 0.7689, - "step": 20790 - }, - { - "epoch": 0.18387878144945985, - "grad_norm": 2.3697686195373535, - "learning_rate": 4.6935353642509005e-05, - "loss": 0.7574, - "step": 20800 - }, - { - "epoch": 0.1839671847097721, - "grad_norm": 4.796818733215332, - "learning_rate": 4.693388025483713e-05, - "loss": 0.7846, - "step": 20810 - }, - { - "epoch": 0.18405558797008434, - "grad_norm": 8.595834732055664, - "learning_rate": 4.693240686716526e-05, - "loss": 0.7348, - "step": 20820 - }, - { - "epoch": 0.18414399123039657, - "grad_norm": 5.621718883514404, - "learning_rate": 4.6930933479493397e-05, - "loss": 0.8067, - "step": 20830 - }, - { - "epoch": 0.18423239449070883, - "grad_norm": 5.930288791656494, - "learning_rate": 4.692946009182152e-05, - "loss": 0.788, - "step": 20840 - }, - { - "epoch": 0.18432079775102106, - "grad_norm": 3.473284959793091, - "learning_rate": 4.692798670414965e-05, - "loss": 0.8705, - "step": 20850 - }, - { - "epoch": 0.1844092010113333, - "grad_norm": 2.478281259536743, - "learning_rate": 4.692651331647778e-05, - "loss": 0.8396, - "step": 20860 - }, - { - "epoch": 0.18449760427164555, - "grad_norm": 8.749351501464844, - "learning_rate": 4.692503992880591e-05, - "loss": 0.9392, - "step": 20870 - }, - { - "epoch": 0.18458600753195778, - "grad_norm": 8.984127044677734, - "learning_rate": 4.692356654113404e-05, - "loss": 0.7349, - "step": 20880 - }, - { - "epoch": 0.18467441079227, - "grad_norm": 5.528654098510742, - "learning_rate": 4.6922093153462173e-05, - "loss": 0.872, - "step": 20890 - }, - { - "epoch": 0.18476281405258227, - "grad_norm": 6.0602898597717285, - "learning_rate": 4.6920619765790295e-05, - "loss": 0.6958, - "step": 20900 - }, - { - "epoch": 0.1848512173128945, - "grad_norm": 3.6754910945892334, - "learning_rate": 4.691914637811843e-05, - "loss": 0.7429, - "step": 20910 - }, - { - "epoch": 0.18493962057320673, - "grad_norm": 2.2601325511932373, - "learning_rate": 4.691767299044655e-05, - "loss": 0.6988, - "step": 20920 - }, - { - "epoch": 0.185028023833519, - "grad_norm": 5.051455020904541, - "learning_rate": 4.691619960277469e-05, - "loss": 0.7465, - "step": 20930 - }, - { - "epoch": 0.18511642709383122, - "grad_norm": 11.01762580871582, - "learning_rate": 4.6914726215102815e-05, - "loss": 0.9008, - "step": 20940 - }, - { - "epoch": 0.18520483035414345, - "grad_norm": 5.050708770751953, - "learning_rate": 4.6913252827430944e-05, - "loss": 0.8705, - "step": 20950 - }, - { - "epoch": 0.1852932336144557, - "grad_norm": 10.381126403808594, - "learning_rate": 4.691177943975907e-05, - "loss": 0.781, - "step": 20960 - }, - { - "epoch": 0.18538163687476794, - "grad_norm": 10.907230377197266, - "learning_rate": 4.691030605208721e-05, - "loss": 0.8314, - "step": 20970 - }, - { - "epoch": 0.18547004013508017, - "grad_norm": 1.5914275646209717, - "learning_rate": 4.690883266441533e-05, - "loss": 0.9625, - "step": 20980 - }, - { - "epoch": 0.18555844339539243, - "grad_norm": 6.222545146942139, - "learning_rate": 4.6907359276743464e-05, - "loss": 0.8171, - "step": 20990 - }, - { - "epoch": 0.18564684665570466, - "grad_norm": 7.4232258796691895, - "learning_rate": 4.690588588907159e-05, - "loss": 0.8049, - "step": 21000 - }, - { - "epoch": 0.1857352499160169, - "grad_norm": 4.576233863830566, - "learning_rate": 4.690441250139972e-05, - "loss": 0.8539, - "step": 21010 - }, - { - "epoch": 0.18582365317632915, - "grad_norm": 2.3963844776153564, - "learning_rate": 4.690293911372785e-05, - "loss": 0.7495, - "step": 21020 - }, - { - "epoch": 0.18591205643664138, - "grad_norm": 6.205860614776611, - "learning_rate": 4.690146572605598e-05, - "loss": 0.7821, - "step": 21030 - }, - { - "epoch": 0.1860004596969536, - "grad_norm": 2.3440845012664795, - "learning_rate": 4.6899992338384106e-05, - "loss": 0.7759, - "step": 21040 - }, - { - "epoch": 0.18608886295726587, - "grad_norm": 2.135939121246338, - "learning_rate": 4.689851895071224e-05, - "loss": 1.0169, - "step": 21050 - }, - { - "epoch": 0.1861772662175781, - "grad_norm": 4.532798767089844, - "learning_rate": 4.689704556304036e-05, - "loss": 0.8234, - "step": 21060 - }, - { - "epoch": 0.18626566947789033, - "grad_norm": 2.2755768299102783, - "learning_rate": 4.68955721753685e-05, - "loss": 0.8013, - "step": 21070 - }, - { - "epoch": 0.1863540727382026, - "grad_norm": 6.704551696777344, - "learning_rate": 4.6894098787696626e-05, - "loss": 0.9893, - "step": 21080 - }, - { - "epoch": 0.18644247599851482, - "grad_norm": 5.669991970062256, - "learning_rate": 4.6892625400024754e-05, - "loss": 0.7986, - "step": 21090 - }, - { - "epoch": 0.18653087925882705, - "grad_norm": 5.91778039932251, - "learning_rate": 4.689115201235288e-05, - "loss": 0.7941, - "step": 21100 - }, - { - "epoch": 0.1866192825191393, - "grad_norm": 2.228177309036255, - "learning_rate": 4.688967862468102e-05, - "loss": 0.841, - "step": 21110 - }, - { - "epoch": 0.18670768577945154, - "grad_norm": 9.556169509887695, - "learning_rate": 4.688820523700914e-05, - "loss": 0.9429, - "step": 21120 - }, - { - "epoch": 0.18679608903976377, - "grad_norm": 3.492906093597412, - "learning_rate": 4.6886731849337274e-05, - "loss": 0.876, - "step": 21130 - }, - { - "epoch": 0.18688449230007603, - "grad_norm": 6.84982442855835, - "learning_rate": 4.68852584616654e-05, - "loss": 0.7982, - "step": 21140 - }, - { - "epoch": 0.18697289556038826, - "grad_norm": 4.788298606872559, - "learning_rate": 4.688378507399353e-05, - "loss": 0.8319, - "step": 21150 - }, - { - "epoch": 0.1870612988207005, - "grad_norm": 7.109243392944336, - "learning_rate": 4.688231168632166e-05, - "loss": 0.8333, - "step": 21160 - }, - { - "epoch": 0.18714970208101275, - "grad_norm": 3.8994028568267822, - "learning_rate": 4.688083829864979e-05, - "loss": 0.8284, - "step": 21170 - }, - { - "epoch": 0.18723810534132498, - "grad_norm": 4.735000133514404, - "learning_rate": 4.6879364910977916e-05, - "loss": 0.7821, - "step": 21180 - }, - { - "epoch": 0.18732650860163721, - "grad_norm": 2.890619993209839, - "learning_rate": 4.687789152330605e-05, - "loss": 0.7143, - "step": 21190 - }, - { - "epoch": 0.18741491186194947, - "grad_norm": 2.096660852432251, - "learning_rate": 4.687641813563417e-05, - "loss": 0.7704, - "step": 21200 - }, - { - "epoch": 0.1875033151222617, - "grad_norm": 5.878719806671143, - "learning_rate": 4.687494474796231e-05, - "loss": 0.8685, - "step": 21210 - }, - { - "epoch": 0.18759171838257396, - "grad_norm": 10.67667293548584, - "learning_rate": 4.6873471360290436e-05, - "loss": 0.772, - "step": 21220 - }, - { - "epoch": 0.1876801216428862, - "grad_norm": 3.8728549480438232, - "learning_rate": 4.6871997972618565e-05, - "loss": 0.6603, - "step": 21230 - }, - { - "epoch": 0.18776852490319842, - "grad_norm": 3.7111921310424805, - "learning_rate": 4.687052458494669e-05, - "loss": 0.8378, - "step": 21240 - }, - { - "epoch": 0.18785692816351068, - "grad_norm": 6.716635704040527, - "learning_rate": 4.686905119727483e-05, - "loss": 0.7352, - "step": 21250 - }, - { - "epoch": 0.18794533142382291, - "grad_norm": 6.060492038726807, - "learning_rate": 4.686757780960295e-05, - "loss": 0.7173, - "step": 21260 - }, - { - "epoch": 0.18803373468413515, - "grad_norm": 4.747287273406982, - "learning_rate": 4.6866104421931085e-05, - "loss": 0.8773, - "step": 21270 - }, - { - "epoch": 0.1881221379444474, - "grad_norm": 6.806166648864746, - "learning_rate": 4.686463103425921e-05, - "loss": 0.7035, - "step": 21280 - }, - { - "epoch": 0.18821054120475963, - "grad_norm": 2.809680938720703, - "learning_rate": 4.686315764658734e-05, - "loss": 0.6529, - "step": 21290 - }, - { - "epoch": 0.18829894446507187, - "grad_norm": 4.832018852233887, - "learning_rate": 4.686168425891547e-05, - "loss": 0.8729, - "step": 21300 - }, - { - "epoch": 0.18838734772538412, - "grad_norm": 8.115225791931152, - "learning_rate": 4.68602108712436e-05, - "loss": 0.8716, - "step": 21310 - }, - { - "epoch": 0.18847575098569636, - "grad_norm": 4.351589679718018, - "learning_rate": 4.6858737483571727e-05, - "loss": 0.8144, - "step": 21320 - }, - { - "epoch": 0.1885641542460086, - "grad_norm": 5.018931865692139, - "learning_rate": 4.685726409589986e-05, - "loss": 0.7823, - "step": 21330 - }, - { - "epoch": 0.18865255750632084, - "grad_norm": 3.043612003326416, - "learning_rate": 4.685579070822799e-05, - "loss": 0.7336, - "step": 21340 - }, - { - "epoch": 0.18874096076663308, - "grad_norm": 7.226938247680664, - "learning_rate": 4.685431732055612e-05, - "loss": 0.8222, - "step": 21350 - }, - { - "epoch": 0.1888293640269453, - "grad_norm": 5.591948509216309, - "learning_rate": 4.685284393288425e-05, - "loss": 0.8815, - "step": 21360 - }, - { - "epoch": 0.18891776728725757, - "grad_norm": 3.7986953258514404, - "learning_rate": 4.6851370545212375e-05, - "loss": 0.8976, - "step": 21370 - }, - { - "epoch": 0.1890061705475698, - "grad_norm": 6.667764186859131, - "learning_rate": 4.6849897157540503e-05, - "loss": 0.7915, - "step": 21380 - }, - { - "epoch": 0.18909457380788203, - "grad_norm": 3.3211944103240967, - "learning_rate": 4.684842376986863e-05, - "loss": 0.7493, - "step": 21390 - }, - { - "epoch": 0.18918297706819429, - "grad_norm": 8.289932250976562, - "learning_rate": 4.684695038219677e-05, - "loss": 0.7112, - "step": 21400 - }, - { - "epoch": 0.18927138032850652, - "grad_norm": 8.218725204467773, - "learning_rate": 4.6845476994524895e-05, - "loss": 0.7393, - "step": 21410 - }, - { - "epoch": 0.18935978358881875, - "grad_norm": 3.564985990524292, - "learning_rate": 4.6844003606853024e-05, - "loss": 0.8505, - "step": 21420 - }, - { - "epoch": 0.189448186849131, - "grad_norm": 3.3935065269470215, - "learning_rate": 4.684253021918115e-05, - "loss": 0.7197, - "step": 21430 - }, - { - "epoch": 0.18953659010944324, - "grad_norm": 7.465757369995117, - "learning_rate": 4.684105683150928e-05, - "loss": 0.6755, - "step": 21440 - }, - { - "epoch": 0.18962499336975547, - "grad_norm": 9.55097770690918, - "learning_rate": 4.683958344383741e-05, - "loss": 0.8269, - "step": 21450 - }, - { - "epoch": 0.18971339663006773, - "grad_norm": 11.654396057128906, - "learning_rate": 4.6838110056165544e-05, - "loss": 0.7776, - "step": 21460 - }, - { - "epoch": 0.18980179989037996, - "grad_norm": 3.557164192199707, - "learning_rate": 4.683663666849367e-05, - "loss": 0.8328, - "step": 21470 - }, - { - "epoch": 0.1898902031506922, - "grad_norm": 2.5593674182891846, - "learning_rate": 4.68351632808218e-05, - "loss": 0.7031, - "step": 21480 - }, - { - "epoch": 0.18997860641100445, - "grad_norm": 4.95280647277832, - "learning_rate": 4.683368989314993e-05, - "loss": 0.8917, - "step": 21490 - }, - { - "epoch": 0.19006700967131668, - "grad_norm": 6.238870620727539, - "learning_rate": 4.683221650547806e-05, - "loss": 0.8169, - "step": 21500 - }, - { - "epoch": 0.1901554129316289, - "grad_norm": 4.238143444061279, - "learning_rate": 4.6830743117806186e-05, - "loss": 0.6997, - "step": 21510 - }, - { - "epoch": 0.19024381619194117, - "grad_norm": 4.553244590759277, - "learning_rate": 4.682926973013432e-05, - "loss": 0.8714, - "step": 21520 - }, - { - "epoch": 0.1903322194522534, - "grad_norm": 4.513711929321289, - "learning_rate": 4.682779634246244e-05, - "loss": 0.6667, - "step": 21530 - }, - { - "epoch": 0.19042062271256563, - "grad_norm": 8.019719123840332, - "learning_rate": 4.682632295479058e-05, - "loss": 0.7857, - "step": 21540 - }, - { - "epoch": 0.1905090259728779, - "grad_norm": 12.806079864501953, - "learning_rate": 4.6824849567118706e-05, - "loss": 0.8822, - "step": 21550 - }, - { - "epoch": 0.19059742923319012, - "grad_norm": 5.0920538902282715, - "learning_rate": 4.6823376179446834e-05, - "loss": 0.7505, - "step": 21560 - }, - { - "epoch": 0.19068583249350235, - "grad_norm": 16.16451644897461, - "learning_rate": 4.682190279177496e-05, - "loss": 0.7657, - "step": 21570 - }, - { - "epoch": 0.1907742357538146, - "grad_norm": 2.1458187103271484, - "learning_rate": 4.68204294041031e-05, - "loss": 0.696, - "step": 21580 - }, - { - "epoch": 0.19086263901412684, - "grad_norm": 3.2431187629699707, - "learning_rate": 4.681895601643122e-05, - "loss": 0.9059, - "step": 21590 - }, - { - "epoch": 0.19095104227443907, - "grad_norm": 6.888151168823242, - "learning_rate": 4.6817482628759354e-05, - "loss": 0.8723, - "step": 21600 - }, - { - "epoch": 0.19103944553475133, - "grad_norm": 4.5483622550964355, - "learning_rate": 4.681600924108748e-05, - "loss": 0.7336, - "step": 21610 - }, - { - "epoch": 0.19112784879506356, - "grad_norm": 8.848033905029297, - "learning_rate": 4.681453585341561e-05, - "loss": 0.8398, - "step": 21620 - }, - { - "epoch": 0.1912162520553758, - "grad_norm": 2.365640640258789, - "learning_rate": 4.681306246574374e-05, - "loss": 0.7033, - "step": 21630 - }, - { - "epoch": 0.19130465531568805, - "grad_norm": 4.853655815124512, - "learning_rate": 4.681158907807187e-05, - "loss": 0.8226, - "step": 21640 - }, - { - "epoch": 0.19139305857600028, - "grad_norm": 7.536863803863525, - "learning_rate": 4.6810115690399996e-05, - "loss": 0.8291, - "step": 21650 - }, - { - "epoch": 0.1914814618363125, - "grad_norm": 5.142647743225098, - "learning_rate": 4.680864230272813e-05, - "loss": 0.7114, - "step": 21660 - }, - { - "epoch": 0.19156986509662477, - "grad_norm": 7.097962856292725, - "learning_rate": 4.680716891505625e-05, - "loss": 0.7708, - "step": 21670 - }, - { - "epoch": 0.191658268356937, - "grad_norm": 10.546960830688477, - "learning_rate": 4.680569552738439e-05, - "loss": 0.7818, - "step": 21680 - }, - { - "epoch": 0.19174667161724923, - "grad_norm": 5.610840797424316, - "learning_rate": 4.6804222139712516e-05, - "loss": 0.8427, - "step": 21690 - }, - { - "epoch": 0.1918350748775615, - "grad_norm": 4.584107398986816, - "learning_rate": 4.6802748752040645e-05, - "loss": 0.7315, - "step": 21700 - }, - { - "epoch": 0.19192347813787372, - "grad_norm": 5.1670660972595215, - "learning_rate": 4.680127536436877e-05, - "loss": 0.7063, - "step": 21710 - }, - { - "epoch": 0.19201188139818595, - "grad_norm": 4.768735885620117, - "learning_rate": 4.679980197669691e-05, - "loss": 0.6673, - "step": 21720 - }, - { - "epoch": 0.1921002846584982, - "grad_norm": 5.147122859954834, - "learning_rate": 4.679832858902503e-05, - "loss": 0.7206, - "step": 21730 - }, - { - "epoch": 0.19218868791881044, - "grad_norm": 4.420533180236816, - "learning_rate": 4.6796855201353165e-05, - "loss": 0.8599, - "step": 21740 - }, - { - "epoch": 0.19227709117912267, - "grad_norm": 4.13287878036499, - "learning_rate": 4.6795381813681286e-05, - "loss": 0.7373, - "step": 21750 - }, - { - "epoch": 0.19236549443943493, - "grad_norm": 9.321707725524902, - "learning_rate": 4.679390842600942e-05, - "loss": 0.8204, - "step": 21760 - }, - { - "epoch": 0.19245389769974716, - "grad_norm": 5.07951545715332, - "learning_rate": 4.679243503833755e-05, - "loss": 0.7072, - "step": 21770 - }, - { - "epoch": 0.19254230096005942, - "grad_norm": 2.755408763885498, - "learning_rate": 4.679096165066568e-05, - "loss": 0.7929, - "step": 21780 - }, - { - "epoch": 0.19263070422037165, - "grad_norm": 3.890803098678589, - "learning_rate": 4.678948826299381e-05, - "loss": 0.8066, - "step": 21790 - }, - { - "epoch": 0.19271910748068388, - "grad_norm": 14.816995620727539, - "learning_rate": 4.678801487532194e-05, - "loss": 0.7702, - "step": 21800 - }, - { - "epoch": 0.19280751074099614, - "grad_norm": 1.9433757066726685, - "learning_rate": 4.678654148765006e-05, - "loss": 0.7438, - "step": 21810 - }, - { - "epoch": 0.19289591400130837, - "grad_norm": 12.897647857666016, - "learning_rate": 4.67850680999782e-05, - "loss": 0.7654, - "step": 21820 - }, - { - "epoch": 0.1929843172616206, - "grad_norm": 5.284687519073486, - "learning_rate": 4.678359471230633e-05, - "loss": 0.8564, - "step": 21830 - }, - { - "epoch": 0.19307272052193286, - "grad_norm": 3.4131147861480713, - "learning_rate": 4.6782121324634455e-05, - "loss": 0.842, - "step": 21840 - }, - { - "epoch": 0.1931611237822451, - "grad_norm": 8.69863224029541, - "learning_rate": 4.6780647936962584e-05, - "loss": 0.9538, - "step": 21850 - }, - { - "epoch": 0.19324952704255732, - "grad_norm": 5.734713077545166, - "learning_rate": 4.677917454929071e-05, - "loss": 0.7811, - "step": 21860 - }, - { - "epoch": 0.19333793030286958, - "grad_norm": 3.2469704151153564, - "learning_rate": 4.677770116161884e-05, - "loss": 0.8141, - "step": 21870 - }, - { - "epoch": 0.1934263335631818, - "grad_norm": 6.072801113128662, - "learning_rate": 4.6776227773946975e-05, - "loss": 0.7981, - "step": 21880 - }, - { - "epoch": 0.19351473682349404, - "grad_norm": 4.3431291580200195, - "learning_rate": 4.67747543862751e-05, - "loss": 0.8232, - "step": 21890 - }, - { - "epoch": 0.1936031400838063, - "grad_norm": 2.9932267665863037, - "learning_rate": 4.677328099860323e-05, - "loss": 0.844, - "step": 21900 - }, - { - "epoch": 0.19369154334411853, - "grad_norm": 7.422665596008301, - "learning_rate": 4.677180761093136e-05, - "loss": 0.8118, - "step": 21910 - }, - { - "epoch": 0.19377994660443076, - "grad_norm": 2.4402995109558105, - "learning_rate": 4.677033422325949e-05, - "loss": 0.8435, - "step": 21920 - }, - { - "epoch": 0.19386834986474302, - "grad_norm": 11.59831714630127, - "learning_rate": 4.676886083558762e-05, - "loss": 0.7656, - "step": 21930 - }, - { - "epoch": 0.19395675312505525, - "grad_norm": 4.198363304138184, - "learning_rate": 4.676738744791575e-05, - "loss": 0.8767, - "step": 21940 - }, - { - "epoch": 0.19404515638536748, - "grad_norm": 3.5214431285858154, - "learning_rate": 4.6765914060243874e-05, - "loss": 0.7284, - "step": 21950 - }, - { - "epoch": 0.19413355964567974, - "grad_norm": 2.6636786460876465, - "learning_rate": 4.676444067257201e-05, - "loss": 0.994, - "step": 21960 - }, - { - "epoch": 0.19422196290599197, - "grad_norm": 3.3864927291870117, - "learning_rate": 4.676296728490014e-05, - "loss": 0.6833, - "step": 21970 - }, - { - "epoch": 0.1943103661663042, - "grad_norm": 2.2272789478302, - "learning_rate": 4.6761493897228266e-05, - "loss": 0.7847, - "step": 21980 - }, - { - "epoch": 0.19439876942661646, - "grad_norm": 2.8747472763061523, - "learning_rate": 4.6760020509556394e-05, - "loss": 0.8957, - "step": 21990 - }, - { - "epoch": 0.1944871726869287, - "grad_norm": 2.239976406097412, - "learning_rate": 4.675854712188452e-05, - "loss": 0.7382, - "step": 22000 - }, - { - "epoch": 0.19457557594724093, - "grad_norm": 4.4402618408203125, - "learning_rate": 4.675707373421265e-05, - "loss": 0.8839, - "step": 22010 - }, - { - "epoch": 0.19466397920755318, - "grad_norm": 4.302387237548828, - "learning_rate": 4.6755600346540786e-05, - "loss": 0.8253, - "step": 22020 - }, - { - "epoch": 0.19475238246786541, - "grad_norm": 3.6364643573760986, - "learning_rate": 4.675412695886891e-05, - "loss": 0.7194, - "step": 22030 - }, - { - "epoch": 0.19484078572817765, - "grad_norm": 2.026207447052002, - "learning_rate": 4.675265357119704e-05, - "loss": 0.7733, - "step": 22040 - }, - { - "epoch": 0.1949291889884899, - "grad_norm": 11.127307891845703, - "learning_rate": 4.675118018352517e-05, - "loss": 0.8729, - "step": 22050 - }, - { - "epoch": 0.19501759224880214, - "grad_norm": 4.305873870849609, - "learning_rate": 4.67497067958533e-05, - "loss": 0.7843, - "step": 22060 - }, - { - "epoch": 0.19510599550911437, - "grad_norm": 2.9550535678863525, - "learning_rate": 4.674823340818143e-05, - "loss": 0.854, - "step": 22070 - }, - { - "epoch": 0.19519439876942662, - "grad_norm": 9.263606071472168, - "learning_rate": 4.674676002050956e-05, - "loss": 0.8685, - "step": 22080 - }, - { - "epoch": 0.19528280202973886, - "grad_norm": 4.128292083740234, - "learning_rate": 4.6745286632837684e-05, - "loss": 0.7077, - "step": 22090 - }, - { - "epoch": 0.1953712052900511, - "grad_norm": 3.258774995803833, - "learning_rate": 4.674381324516582e-05, - "loss": 0.74, - "step": 22100 - }, - { - "epoch": 0.19545960855036335, - "grad_norm": 6.328365325927734, - "learning_rate": 4.674233985749394e-05, - "loss": 0.8824, - "step": 22110 - }, - { - "epoch": 0.19554801181067558, - "grad_norm": 8.921202659606934, - "learning_rate": 4.6740866469822076e-05, - "loss": 0.7058, - "step": 22120 - }, - { - "epoch": 0.1956364150709878, - "grad_norm": 12.509286880493164, - "learning_rate": 4.6739393082150205e-05, - "loss": 0.8928, - "step": 22130 - }, - { - "epoch": 0.19572481833130007, - "grad_norm": 6.750686168670654, - "learning_rate": 4.673791969447833e-05, - "loss": 0.832, - "step": 22140 - }, - { - "epoch": 0.1958132215916123, - "grad_norm": 2.3646960258483887, - "learning_rate": 4.673644630680646e-05, - "loss": 0.8489, - "step": 22150 - }, - { - "epoch": 0.19590162485192453, - "grad_norm": 4.463263511657715, - "learning_rate": 4.6734972919134596e-05, - "loss": 0.7064, - "step": 22160 - }, - { - "epoch": 0.19599002811223679, - "grad_norm": 3.6031270027160645, - "learning_rate": 4.673349953146272e-05, - "loss": 0.7205, - "step": 22170 - }, - { - "epoch": 0.19607843137254902, - "grad_norm": 5.079982280731201, - "learning_rate": 4.673202614379085e-05, - "loss": 0.8516, - "step": 22180 - }, - { - "epoch": 0.19616683463286125, - "grad_norm": 2.213585376739502, - "learning_rate": 4.673055275611898e-05, - "loss": 0.8975, - "step": 22190 - }, - { - "epoch": 0.1962552378931735, - "grad_norm": 8.803163528442383, - "learning_rate": 4.672907936844711e-05, - "loss": 0.8135, - "step": 22200 - }, - { - "epoch": 0.19634364115348574, - "grad_norm": 9.55571460723877, - "learning_rate": 4.672760598077524e-05, - "loss": 0.7677, - "step": 22210 - }, - { - "epoch": 0.19643204441379797, - "grad_norm": 2.8771579265594482, - "learning_rate": 4.6726132593103367e-05, - "loss": 0.6235, - "step": 22220 - }, - { - "epoch": 0.19652044767411023, - "grad_norm": 13.60547924041748, - "learning_rate": 4.6724659205431495e-05, - "loss": 0.8516, - "step": 22230 - }, - { - "epoch": 0.19660885093442246, - "grad_norm": 8.28115463256836, - "learning_rate": 4.672318581775963e-05, - "loss": 0.8508, - "step": 22240 - }, - { - "epoch": 0.1966972541947347, - "grad_norm": 4.614414691925049, - "learning_rate": 4.672171243008776e-05, - "loss": 0.9152, - "step": 22250 - }, - { - "epoch": 0.19678565745504695, - "grad_norm": 4.92991304397583, - "learning_rate": 4.672023904241589e-05, - "loss": 0.8689, - "step": 22260 - }, - { - "epoch": 0.19687406071535918, - "grad_norm": 6.292807579040527, - "learning_rate": 4.6718765654744015e-05, - "loss": 0.7708, - "step": 22270 - }, - { - "epoch": 0.1969624639756714, - "grad_norm": 2.8370020389556885, - "learning_rate": 4.6717292267072143e-05, - "loss": 0.7834, - "step": 22280 - }, - { - "epoch": 0.19705086723598367, - "grad_norm": 4.450292587280273, - "learning_rate": 4.671581887940027e-05, - "loss": 0.7851, - "step": 22290 - }, - { - "epoch": 0.1971392704962959, - "grad_norm": 9.224190711975098, - "learning_rate": 4.671434549172841e-05, - "loss": 0.7873, - "step": 22300 - }, - { - "epoch": 0.19722767375660816, - "grad_norm": 8.472943305969238, - "learning_rate": 4.6712872104056535e-05, - "loss": 0.7665, - "step": 22310 - }, - { - "epoch": 0.1973160770169204, - "grad_norm": 5.1440863609313965, - "learning_rate": 4.6711398716384664e-05, - "loss": 0.8139, - "step": 22320 - }, - { - "epoch": 0.19740448027723262, - "grad_norm": 7.636616230010986, - "learning_rate": 4.670992532871279e-05, - "loss": 0.805, - "step": 22330 - }, - { - "epoch": 0.19749288353754488, - "grad_norm": 7.643059730529785, - "learning_rate": 4.670845194104092e-05, - "loss": 0.8944, - "step": 22340 - }, - { - "epoch": 0.1975812867978571, - "grad_norm": 5.8303632736206055, - "learning_rate": 4.670697855336905e-05, - "loss": 0.7028, - "step": 22350 - }, - { - "epoch": 0.19766969005816934, - "grad_norm": 2.1993534564971924, - "learning_rate": 4.670550516569718e-05, - "loss": 0.7117, - "step": 22360 - }, - { - "epoch": 0.1977580933184816, - "grad_norm": 4.56084680557251, - "learning_rate": 4.670403177802531e-05, - "loss": 0.7634, - "step": 22370 - }, - { - "epoch": 0.19784649657879383, - "grad_norm": 9.242644309997559, - "learning_rate": 4.670255839035344e-05, - "loss": 0.8332, - "step": 22380 - }, - { - "epoch": 0.19793489983910606, - "grad_norm": 15.057245254516602, - "learning_rate": 4.670108500268157e-05, - "loss": 0.7764, - "step": 22390 - }, - { - "epoch": 0.19802330309941832, - "grad_norm": 3.096505880355835, - "learning_rate": 4.66996116150097e-05, - "loss": 0.8239, - "step": 22400 - }, - { - "epoch": 0.19811170635973055, - "grad_norm": 4.721726417541504, - "learning_rate": 4.6698138227337826e-05, - "loss": 0.7958, - "step": 22410 - }, - { - "epoch": 0.19820010962004278, - "grad_norm": 3.4196979999542236, - "learning_rate": 4.6696664839665954e-05, - "loss": 0.8045, - "step": 22420 - }, - { - "epoch": 0.19828851288035504, - "grad_norm": 6.442022800445557, - "learning_rate": 4.669519145199409e-05, - "loss": 0.7235, - "step": 22430 - }, - { - "epoch": 0.19837691614066727, - "grad_norm": 7.930601119995117, - "learning_rate": 4.669371806432222e-05, - "loss": 0.8965, - "step": 22440 - }, - { - "epoch": 0.1984653194009795, - "grad_norm": 1.932212233543396, - "learning_rate": 4.6692244676650346e-05, - "loss": 0.8522, - "step": 22450 - }, - { - "epoch": 0.19855372266129176, - "grad_norm": 2.316455841064453, - "learning_rate": 4.6690771288978474e-05, - "loss": 0.7646, - "step": 22460 - }, - { - "epoch": 0.198642125921604, - "grad_norm": 5.144872188568115, - "learning_rate": 4.66892979013066e-05, - "loss": 0.7287, - "step": 22470 - }, - { - "epoch": 0.19873052918191622, - "grad_norm": 5.668541431427002, - "learning_rate": 4.668782451363473e-05, - "loss": 0.6577, - "step": 22480 - }, - { - "epoch": 0.19881893244222848, - "grad_norm": 6.585099697113037, - "learning_rate": 4.6686351125962866e-05, - "loss": 0.7031, - "step": 22490 - }, - { - "epoch": 0.1989073357025407, - "grad_norm": 5.952176094055176, - "learning_rate": 4.668487773829099e-05, - "loss": 0.7519, - "step": 22500 - }, - { - "epoch": 0.19899573896285294, - "grad_norm": 10.434608459472656, - "learning_rate": 4.668340435061912e-05, - "loss": 0.7314, - "step": 22510 - }, - { - "epoch": 0.1990841422231652, - "grad_norm": 6.423018932342529, - "learning_rate": 4.668193096294725e-05, - "loss": 0.7372, - "step": 22520 - }, - { - "epoch": 0.19917254548347743, - "grad_norm": 3.842513084411621, - "learning_rate": 4.668045757527538e-05, - "loss": 0.8626, - "step": 22530 - }, - { - "epoch": 0.19926094874378966, - "grad_norm": 4.58589506149292, - "learning_rate": 4.667898418760351e-05, - "loss": 0.9298, - "step": 22540 - }, - { - "epoch": 0.19934935200410192, - "grad_norm": 3.9880809783935547, - "learning_rate": 4.667751079993164e-05, - "loss": 0.7535, - "step": 22550 - }, - { - "epoch": 0.19943775526441415, - "grad_norm": 3.5416860580444336, - "learning_rate": 4.6676037412259764e-05, - "loss": 0.7448, - "step": 22560 - }, - { - "epoch": 0.19952615852472638, - "grad_norm": 6.561315059661865, - "learning_rate": 4.66745640245879e-05, - "loss": 0.9248, - "step": 22570 - }, - { - "epoch": 0.19961456178503864, - "grad_norm": 8.85240650177002, - "learning_rate": 4.667309063691602e-05, - "loss": 0.6894, - "step": 22580 - }, - { - "epoch": 0.19970296504535087, - "grad_norm": 4.638481140136719, - "learning_rate": 4.6671617249244156e-05, - "loss": 0.6975, - "step": 22590 - }, - { - "epoch": 0.1997913683056631, - "grad_norm": 9.74061107635498, - "learning_rate": 4.6670143861572285e-05, - "loss": 0.9096, - "step": 22600 - }, - { - "epoch": 0.19987977156597536, - "grad_norm": 7.308407783508301, - "learning_rate": 4.666867047390041e-05, - "loss": 0.8025, - "step": 22610 - }, - { - "epoch": 0.1999681748262876, - "grad_norm": 4.789428234100342, - "learning_rate": 4.666719708622854e-05, - "loss": 0.7437, - "step": 22620 - }, - { - "epoch": 0.20005657808659982, - "grad_norm": 4.0193190574646, - "learning_rate": 4.6665723698556676e-05, - "loss": 0.6633, - "step": 22630 - }, - { - "epoch": 0.20014498134691208, - "grad_norm": 11.492403030395508, - "learning_rate": 4.66642503108848e-05, - "loss": 0.8315, - "step": 22640 - }, - { - "epoch": 0.2002333846072243, - "grad_norm": 3.3232409954071045, - "learning_rate": 4.666277692321293e-05, - "loss": 0.7715, - "step": 22650 - }, - { - "epoch": 0.20032178786753654, - "grad_norm": 6.283932209014893, - "learning_rate": 4.666130353554106e-05, - "loss": 0.8237, - "step": 22660 - }, - { - "epoch": 0.2004101911278488, - "grad_norm": 8.97296142578125, - "learning_rate": 4.665983014786919e-05, - "loss": 0.8166, - "step": 22670 - }, - { - "epoch": 0.20049859438816103, - "grad_norm": 6.649289131164551, - "learning_rate": 4.665835676019732e-05, - "loss": 0.8446, - "step": 22680 - }, - { - "epoch": 0.20058699764847326, - "grad_norm": 6.020296573638916, - "learning_rate": 4.6656883372525447e-05, - "loss": 0.8312, - "step": 22690 - }, - { - "epoch": 0.20067540090878552, - "grad_norm": 9.102386474609375, - "learning_rate": 4.6655409984853575e-05, - "loss": 0.7975, - "step": 22700 - }, - { - "epoch": 0.20076380416909775, - "grad_norm": 3.375962972640991, - "learning_rate": 4.665393659718171e-05, - "loss": 0.8166, - "step": 22710 - }, - { - "epoch": 0.20085220742940998, - "grad_norm": 3.7020063400268555, - "learning_rate": 4.665246320950983e-05, - "loss": 0.8367, - "step": 22720 - }, - { - "epoch": 0.20094061068972224, - "grad_norm": 8.71592903137207, - "learning_rate": 4.665098982183797e-05, - "loss": 0.8129, - "step": 22730 - }, - { - "epoch": 0.20102901395003447, - "grad_norm": 3.620912790298462, - "learning_rate": 4.6649516434166095e-05, - "loss": 0.8335, - "step": 22740 - }, - { - "epoch": 0.2011174172103467, - "grad_norm": 2.114528179168701, - "learning_rate": 4.6648043046494223e-05, - "loss": 0.8284, - "step": 22750 - }, - { - "epoch": 0.20120582047065896, - "grad_norm": 7.276772975921631, - "learning_rate": 4.664656965882235e-05, - "loss": 0.7431, - "step": 22760 - }, - { - "epoch": 0.2012942237309712, - "grad_norm": 4.809281826019287, - "learning_rate": 4.664509627115049e-05, - "loss": 0.8252, - "step": 22770 - }, - { - "epoch": 0.20138262699128343, - "grad_norm": 2.5595180988311768, - "learning_rate": 4.664362288347861e-05, - "loss": 0.8273, - "step": 22780 - }, - { - "epoch": 0.20147103025159568, - "grad_norm": 7.62675666809082, - "learning_rate": 4.6642149495806744e-05, - "loss": 0.9131, - "step": 22790 - }, - { - "epoch": 0.20155943351190792, - "grad_norm": 5.095913410186768, - "learning_rate": 4.6640676108134865e-05, - "loss": 0.7483, - "step": 22800 - }, - { - "epoch": 0.20164783677222015, - "grad_norm": 4.876876354217529, - "learning_rate": 4.6639202720463e-05, - "loss": 0.7971, - "step": 22810 - }, - { - "epoch": 0.2017362400325324, - "grad_norm": 3.420225143432617, - "learning_rate": 4.663772933279113e-05, - "loss": 0.7375, - "step": 22820 - }, - { - "epoch": 0.20182464329284464, - "grad_norm": 5.955178260803223, - "learning_rate": 4.663625594511926e-05, - "loss": 0.6665, - "step": 22830 - }, - { - "epoch": 0.2019130465531569, - "grad_norm": 5.702826976776123, - "learning_rate": 4.6634782557447385e-05, - "loss": 0.8483, - "step": 22840 - }, - { - "epoch": 0.20200144981346912, - "grad_norm": 4.032488822937012, - "learning_rate": 4.663330916977552e-05, - "loss": 0.7132, - "step": 22850 - }, - { - "epoch": 0.20208985307378136, - "grad_norm": 3.2358500957489014, - "learning_rate": 4.663183578210364e-05, - "loss": 0.8269, - "step": 22860 - }, - { - "epoch": 0.20217825633409361, - "grad_norm": 8.215649604797363, - "learning_rate": 4.663036239443178e-05, - "loss": 0.8625, - "step": 22870 - }, - { - "epoch": 0.20226665959440585, - "grad_norm": 2.6914680004119873, - "learning_rate": 4.6628889006759906e-05, - "loss": 0.7684, - "step": 22880 - }, - { - "epoch": 0.20235506285471808, - "grad_norm": 5.066773891448975, - "learning_rate": 4.6627415619088034e-05, - "loss": 0.8112, - "step": 22890 - }, - { - "epoch": 0.20244346611503033, - "grad_norm": 9.938232421875, - "learning_rate": 4.662594223141616e-05, - "loss": 0.7483, - "step": 22900 - }, - { - "epoch": 0.20253186937534257, - "grad_norm": 9.867469787597656, - "learning_rate": 4.66244688437443e-05, - "loss": 0.8257, - "step": 22910 - }, - { - "epoch": 0.2026202726356548, - "grad_norm": 2.619184970855713, - "learning_rate": 4.662299545607242e-05, - "loss": 0.74, - "step": 22920 - }, - { - "epoch": 0.20270867589596706, - "grad_norm": 7.602010250091553, - "learning_rate": 4.6621522068400554e-05, - "loss": 0.8422, - "step": 22930 - }, - { - "epoch": 0.2027970791562793, - "grad_norm": 4.414831161499023, - "learning_rate": 4.6620048680728676e-05, - "loss": 0.7838, - "step": 22940 - }, - { - "epoch": 0.20288548241659152, - "grad_norm": 4.029686450958252, - "learning_rate": 4.661857529305681e-05, - "loss": 0.783, - "step": 22950 - }, - { - "epoch": 0.20297388567690378, - "grad_norm": 2.539350986480713, - "learning_rate": 4.661710190538494e-05, - "loss": 0.8021, - "step": 22960 - }, - { - "epoch": 0.203062288937216, - "grad_norm": 4.161380767822266, - "learning_rate": 4.661562851771307e-05, - "loss": 0.8077, - "step": 22970 - }, - { - "epoch": 0.20315069219752824, - "grad_norm": 3.5896804332733154, - "learning_rate": 4.6614155130041196e-05, - "loss": 0.8087, - "step": 22980 - }, - { - "epoch": 0.2032390954578405, - "grad_norm": 2.0759224891662598, - "learning_rate": 4.661268174236933e-05, - "loss": 0.8295, - "step": 22990 - }, - { - "epoch": 0.20332749871815273, - "grad_norm": 3.9206223487854004, - "learning_rate": 4.661120835469745e-05, - "loss": 0.8418, - "step": 23000 - }, - { - "epoch": 0.20341590197846496, - "grad_norm": 5.133076190948486, - "learning_rate": 4.660973496702559e-05, - "loss": 0.8928, - "step": 23010 - }, - { - "epoch": 0.20350430523877722, - "grad_norm": 3.193308115005493, - "learning_rate": 4.6608261579353716e-05, - "loss": 0.8147, - "step": 23020 - }, - { - "epoch": 0.20359270849908945, - "grad_norm": 4.302774906158447, - "learning_rate": 4.6606788191681845e-05, - "loss": 0.7561, - "step": 23030 - }, - { - "epoch": 0.20368111175940168, - "grad_norm": 3.4367735385894775, - "learning_rate": 4.660531480400997e-05, - "loss": 0.7127, - "step": 23040 - }, - { - "epoch": 0.20376951501971394, - "grad_norm": 6.8108015060424805, - "learning_rate": 4.66038414163381e-05, - "loss": 0.7675, - "step": 23050 - }, - { - "epoch": 0.20385791828002617, - "grad_norm": 9.134854316711426, - "learning_rate": 4.660236802866623e-05, - "loss": 0.8798, - "step": 23060 - }, - { - "epoch": 0.2039463215403384, - "grad_norm": 2.6153030395507812, - "learning_rate": 4.6600894640994365e-05, - "loss": 0.8223, - "step": 23070 - }, - { - "epoch": 0.20403472480065066, - "grad_norm": 5.8745903968811035, - "learning_rate": 4.6599421253322486e-05, - "loss": 0.8014, - "step": 23080 - }, - { - "epoch": 0.2041231280609629, - "grad_norm": 4.749171257019043, - "learning_rate": 4.659794786565062e-05, - "loss": 0.7336, - "step": 23090 - }, - { - "epoch": 0.20421153132127512, - "grad_norm": 3.4737186431884766, - "learning_rate": 4.659647447797875e-05, - "loss": 0.7971, - "step": 23100 - }, - { - "epoch": 0.20429993458158738, - "grad_norm": 3.0345163345336914, - "learning_rate": 4.659500109030688e-05, - "loss": 0.7936, - "step": 23110 - }, - { - "epoch": 0.2043883378418996, - "grad_norm": 9.059696197509766, - "learning_rate": 4.6593527702635006e-05, - "loss": 0.7744, - "step": 23120 - }, - { - "epoch": 0.20447674110221184, - "grad_norm": 4.60284423828125, - "learning_rate": 4.659205431496314e-05, - "loss": 0.8543, - "step": 23130 - }, - { - "epoch": 0.2045651443625241, - "grad_norm": 5.638271808624268, - "learning_rate": 4.659058092729126e-05, - "loss": 0.8235, - "step": 23140 - }, - { - "epoch": 0.20465354762283633, - "grad_norm": 9.336036682128906, - "learning_rate": 4.65891075396194e-05, - "loss": 0.8429, - "step": 23150 - }, - { - "epoch": 0.20474195088314856, - "grad_norm": 3.498440980911255, - "learning_rate": 4.658763415194753e-05, - "loss": 0.8518, - "step": 23160 - }, - { - "epoch": 0.20483035414346082, - "grad_norm": 4.826033115386963, - "learning_rate": 4.6586160764275655e-05, - "loss": 0.8763, - "step": 23170 - }, - { - "epoch": 0.20491875740377305, - "grad_norm": 7.477751731872559, - "learning_rate": 4.658468737660378e-05, - "loss": 0.7637, - "step": 23180 - }, - { - "epoch": 0.20500716066408528, - "grad_norm": 8.1306791305542, - "learning_rate": 4.658321398893191e-05, - "loss": 0.6267, - "step": 23190 - }, - { - "epoch": 0.20509556392439754, - "grad_norm": 4.623834609985352, - "learning_rate": 4.658174060126004e-05, - "loss": 0.8497, - "step": 23200 - }, - { - "epoch": 0.20518396718470977, - "grad_norm": 4.3841166496276855, - "learning_rate": 4.6580267213588175e-05, - "loss": 0.6705, - "step": 23210 - }, - { - "epoch": 0.205272370445022, - "grad_norm": 3.9328527450561523, - "learning_rate": 4.6578793825916304e-05, - "loss": 0.7929, - "step": 23220 - }, - { - "epoch": 0.20536077370533426, - "grad_norm": 3.7214295864105225, - "learning_rate": 4.657732043824443e-05, - "loss": 0.7466, - "step": 23230 - }, - { - "epoch": 0.2054491769656465, - "grad_norm": 2.744231939315796, - "learning_rate": 4.657584705057256e-05, - "loss": 0.8482, - "step": 23240 - }, - { - "epoch": 0.20553758022595872, - "grad_norm": 7.006786823272705, - "learning_rate": 4.657437366290069e-05, - "loss": 0.7837, - "step": 23250 - }, - { - "epoch": 0.20562598348627098, - "grad_norm": 5.491894721984863, - "learning_rate": 4.657290027522882e-05, - "loss": 0.746, - "step": 23260 - }, - { - "epoch": 0.2057143867465832, - "grad_norm": 6.551908016204834, - "learning_rate": 4.6571426887556945e-05, - "loss": 0.7904, - "step": 23270 - }, - { - "epoch": 0.20580279000689544, - "grad_norm": 4.034269332885742, - "learning_rate": 4.656995349988508e-05, - "loss": 0.8047, - "step": 23280 - }, - { - "epoch": 0.2058911932672077, - "grad_norm": 2.973371982574463, - "learning_rate": 4.656848011221321e-05, - "loss": 0.8121, - "step": 23290 - }, - { - "epoch": 0.20597959652751993, - "grad_norm": 2.032548666000366, - "learning_rate": 4.656700672454134e-05, - "loss": 0.6712, - "step": 23300 - }, - { - "epoch": 0.20606799978783216, - "grad_norm": 8.16117000579834, - "learning_rate": 4.6565533336869466e-05, - "loss": 0.9361, - "step": 23310 - }, - { - "epoch": 0.20615640304814442, - "grad_norm": 5.261748790740967, - "learning_rate": 4.6564059949197594e-05, - "loss": 0.8432, - "step": 23320 - }, - { - "epoch": 0.20624480630845665, - "grad_norm": 3.1846604347229004, - "learning_rate": 4.656258656152572e-05, - "loss": 0.7282, - "step": 23330 - }, - { - "epoch": 0.20633320956876888, - "grad_norm": 3.816189765930176, - "learning_rate": 4.656111317385386e-05, - "loss": 0.8425, - "step": 23340 - }, - { - "epoch": 0.20642161282908114, - "grad_norm": 5.439279556274414, - "learning_rate": 4.6559639786181986e-05, - "loss": 0.7602, - "step": 23350 - }, - { - "epoch": 0.20651001608939337, - "grad_norm": 4.367234230041504, - "learning_rate": 4.6558166398510114e-05, - "loss": 0.76, - "step": 23360 - }, - { - "epoch": 0.20659841934970563, - "grad_norm": 12.20114517211914, - "learning_rate": 4.655669301083824e-05, - "loss": 0.812, - "step": 23370 - }, - { - "epoch": 0.20668682261001786, - "grad_norm": 8.623457908630371, - "learning_rate": 4.655521962316637e-05, - "loss": 0.834, - "step": 23380 - }, - { - "epoch": 0.2067752258703301, - "grad_norm": 2.464705228805542, - "learning_rate": 4.65537462354945e-05, - "loss": 0.7101, - "step": 23390 - }, - { - "epoch": 0.20686362913064235, - "grad_norm": 4.3877482414245605, - "learning_rate": 4.6552272847822634e-05, - "loss": 0.7512, - "step": 23400 - }, - { - "epoch": 0.20695203239095458, - "grad_norm": 1.7268105745315552, - "learning_rate": 4.6550799460150756e-05, - "loss": 0.7052, - "step": 23410 - }, - { - "epoch": 0.2070404356512668, - "grad_norm": 2.445685863494873, - "learning_rate": 4.654932607247889e-05, - "loss": 0.6972, - "step": 23420 - }, - { - "epoch": 0.20712883891157907, - "grad_norm": 2.92224383354187, - "learning_rate": 4.654785268480702e-05, - "loss": 0.7562, - "step": 23430 - }, - { - "epoch": 0.2072172421718913, - "grad_norm": 10.058605194091797, - "learning_rate": 4.654637929713515e-05, - "loss": 0.7735, - "step": 23440 - }, - { - "epoch": 0.20730564543220353, - "grad_norm": 3.240065097808838, - "learning_rate": 4.6544905909463276e-05, - "loss": 0.9023, - "step": 23450 - }, - { - "epoch": 0.2073940486925158, - "grad_norm": 5.235249996185303, - "learning_rate": 4.654343252179141e-05, - "loss": 0.8871, - "step": 23460 - }, - { - "epoch": 0.20748245195282802, - "grad_norm": 5.795628547668457, - "learning_rate": 4.654195913411953e-05, - "loss": 0.725, - "step": 23470 - }, - { - "epoch": 0.20757085521314025, - "grad_norm": 9.360791206359863, - "learning_rate": 4.654048574644767e-05, - "loss": 0.8301, - "step": 23480 - }, - { - "epoch": 0.2076592584734525, - "grad_norm": 3.732430934906006, - "learning_rate": 4.6539012358775796e-05, - "loss": 0.836, - "step": 23490 - }, - { - "epoch": 0.20774766173376474, - "grad_norm": 18.71736717224121, - "learning_rate": 4.6537538971103925e-05, - "loss": 0.7987, - "step": 23500 - }, - { - "epoch": 0.20783606499407697, - "grad_norm": 5.200915336608887, - "learning_rate": 4.653606558343205e-05, - "loss": 0.8573, - "step": 23510 - }, - { - "epoch": 0.20792446825438923, - "grad_norm": 1.8514925241470337, - "learning_rate": 4.653459219576018e-05, - "loss": 0.8253, - "step": 23520 - }, - { - "epoch": 0.20801287151470146, - "grad_norm": 4.80307149887085, - "learning_rate": 4.653311880808831e-05, - "loss": 0.7087, - "step": 23530 - }, - { - "epoch": 0.2081012747750137, - "grad_norm": 8.400822639465332, - "learning_rate": 4.6531645420416445e-05, - "loss": 0.7526, - "step": 23540 - }, - { - "epoch": 0.20818967803532595, - "grad_norm": 7.24381685256958, - "learning_rate": 4.6530172032744566e-05, - "loss": 0.7866, - "step": 23550 - }, - { - "epoch": 0.20827808129563818, - "grad_norm": 7.329807281494141, - "learning_rate": 4.65286986450727e-05, - "loss": 0.834, - "step": 23560 - }, - { - "epoch": 0.20836648455595042, - "grad_norm": 4.557115077972412, - "learning_rate": 4.652722525740083e-05, - "loss": 0.6873, - "step": 23570 - }, - { - "epoch": 0.20845488781626267, - "grad_norm": 4.804540634155273, - "learning_rate": 4.652575186972896e-05, - "loss": 0.6711, - "step": 23580 - }, - { - "epoch": 0.2085432910765749, - "grad_norm": 3.193962574005127, - "learning_rate": 4.6524278482057087e-05, - "loss": 0.8167, - "step": 23590 - }, - { - "epoch": 0.20863169433688714, - "grad_norm": 5.953191757202148, - "learning_rate": 4.652280509438522e-05, - "loss": 0.9337, - "step": 23600 - }, - { - "epoch": 0.2087200975971994, - "grad_norm": 3.995999574661255, - "learning_rate": 4.652133170671334e-05, - "loss": 0.7633, - "step": 23610 - }, - { - "epoch": 0.20880850085751163, - "grad_norm": 3.6463701725006104, - "learning_rate": 4.651985831904148e-05, - "loss": 0.7166, - "step": 23620 - }, - { - "epoch": 0.20889690411782386, - "grad_norm": 3.6311116218566895, - "learning_rate": 4.65183849313696e-05, - "loss": 0.7353, - "step": 23630 - }, - { - "epoch": 0.20898530737813611, - "grad_norm": 9.220864295959473, - "learning_rate": 4.6516911543697735e-05, - "loss": 0.7907, - "step": 23640 - }, - { - "epoch": 0.20907371063844835, - "grad_norm": 2.683284282684326, - "learning_rate": 4.6515438156025863e-05, - "loss": 0.8561, - "step": 23650 - }, - { - "epoch": 0.20916211389876058, - "grad_norm": 5.696201801300049, - "learning_rate": 4.651396476835399e-05, - "loss": 0.7591, - "step": 23660 - }, - { - "epoch": 0.20925051715907284, - "grad_norm": 3.0992331504821777, - "learning_rate": 4.651249138068212e-05, - "loss": 0.7089, - "step": 23670 - }, - { - "epoch": 0.20933892041938507, - "grad_norm": 3.3280680179595947, - "learning_rate": 4.6511017993010255e-05, - "loss": 0.7249, - "step": 23680 - }, - { - "epoch": 0.2094273236796973, - "grad_norm": 2.1861989498138428, - "learning_rate": 4.650954460533838e-05, - "loss": 0.6126, - "step": 23690 - }, - { - "epoch": 0.20951572694000956, - "grad_norm": 9.512399673461914, - "learning_rate": 4.650807121766651e-05, - "loss": 0.8728, - "step": 23700 - }, - { - "epoch": 0.2096041302003218, - "grad_norm": 4.519936561584473, - "learning_rate": 4.650659782999464e-05, - "loss": 0.7135, - "step": 23710 - }, - { - "epoch": 0.20969253346063402, - "grad_norm": 5.0087738037109375, - "learning_rate": 4.650512444232277e-05, - "loss": 0.7502, - "step": 23720 - }, - { - "epoch": 0.20978093672094628, - "grad_norm": 7.563531875610352, - "learning_rate": 4.65036510546509e-05, - "loss": 0.7536, - "step": 23730 - }, - { - "epoch": 0.2098693399812585, - "grad_norm": 1.7587312459945679, - "learning_rate": 4.6502177666979025e-05, - "loss": 0.7315, - "step": 23740 - }, - { - "epoch": 0.20995774324157074, - "grad_norm": 4.123885154724121, - "learning_rate": 4.6500704279307154e-05, - "loss": 0.7776, - "step": 23750 - }, - { - "epoch": 0.210046146501883, - "grad_norm": 4.363010406494141, - "learning_rate": 4.649923089163529e-05, - "loss": 0.7807, - "step": 23760 - }, - { - "epoch": 0.21013454976219523, - "grad_norm": 5.097952365875244, - "learning_rate": 4.649775750396341e-05, - "loss": 0.8445, - "step": 23770 - }, - { - "epoch": 0.21022295302250746, - "grad_norm": 8.998221397399902, - "learning_rate": 4.6496284116291546e-05, - "loss": 0.7876, - "step": 23780 - }, - { - "epoch": 0.21031135628281972, - "grad_norm": 3.2325947284698486, - "learning_rate": 4.6494810728619674e-05, - "loss": 0.7932, - "step": 23790 - }, - { - "epoch": 0.21039975954313195, - "grad_norm": 6.585747718811035, - "learning_rate": 4.64933373409478e-05, - "loss": 0.7614, - "step": 23800 - }, - { - "epoch": 0.21048816280344418, - "grad_norm": 3.006070137023926, - "learning_rate": 4.649186395327593e-05, - "loss": 0.8762, - "step": 23810 - }, - { - "epoch": 0.21057656606375644, - "grad_norm": 5.100870132446289, - "learning_rate": 4.6490390565604066e-05, - "loss": 0.906, - "step": 23820 - }, - { - "epoch": 0.21066496932406867, - "grad_norm": 6.8270745277404785, - "learning_rate": 4.648891717793219e-05, - "loss": 0.8616, - "step": 23830 - }, - { - "epoch": 0.2107533725843809, - "grad_norm": 5.72274112701416, - "learning_rate": 4.648744379026032e-05, - "loss": 0.8046, - "step": 23840 - }, - { - "epoch": 0.21084177584469316, - "grad_norm": 5.608553886413574, - "learning_rate": 4.648597040258845e-05, - "loss": 0.8117, - "step": 23850 - }, - { - "epoch": 0.2109301791050054, - "grad_norm": 4.716365814208984, - "learning_rate": 4.648449701491658e-05, - "loss": 0.8121, - "step": 23860 - }, - { - "epoch": 0.21101858236531762, - "grad_norm": 8.17506217956543, - "learning_rate": 4.648302362724471e-05, - "loss": 0.7239, - "step": 23870 - }, - { - "epoch": 0.21110698562562988, - "grad_norm": 5.354269981384277, - "learning_rate": 4.6481550239572836e-05, - "loss": 0.8251, - "step": 23880 - }, - { - "epoch": 0.2111953888859421, - "grad_norm": 7.155550479888916, - "learning_rate": 4.6480076851900964e-05, - "loss": 0.855, - "step": 23890 - }, - { - "epoch": 0.21128379214625434, - "grad_norm": 4.716402053833008, - "learning_rate": 4.64786034642291e-05, - "loss": 0.8917, - "step": 23900 - }, - { - "epoch": 0.2113721954065666, - "grad_norm": 6.345791339874268, - "learning_rate": 4.647713007655722e-05, - "loss": 0.7738, - "step": 23910 - }, - { - "epoch": 0.21146059866687883, - "grad_norm": 2.3480265140533447, - "learning_rate": 4.6475656688885356e-05, - "loss": 0.8602, - "step": 23920 - }, - { - "epoch": 0.2115490019271911, - "grad_norm": 3.5254249572753906, - "learning_rate": 4.6474183301213484e-05, - "loss": 0.838, - "step": 23930 - }, - { - "epoch": 0.21163740518750332, - "grad_norm": 3.4268798828125, - "learning_rate": 4.647270991354161e-05, - "loss": 0.6845, - "step": 23940 - }, - { - "epoch": 0.21172580844781555, - "grad_norm": 1.6763031482696533, - "learning_rate": 4.647123652586974e-05, - "loss": 0.6627, - "step": 23950 - }, - { - "epoch": 0.2118142117081278, - "grad_norm": 7.624725818634033, - "learning_rate": 4.6469763138197876e-05, - "loss": 0.7902, - "step": 23960 - }, - { - "epoch": 0.21190261496844004, - "grad_norm": 4.639545440673828, - "learning_rate": 4.6468289750526e-05, - "loss": 0.6541, - "step": 23970 - }, - { - "epoch": 0.21199101822875227, - "grad_norm": 2.399841547012329, - "learning_rate": 4.646681636285413e-05, - "loss": 0.8074, - "step": 23980 - }, - { - "epoch": 0.21207942148906453, - "grad_norm": 1.593639850616455, - "learning_rate": 4.6465342975182255e-05, - "loss": 0.7049, - "step": 23990 - }, - { - "epoch": 0.21216782474937676, - "grad_norm": 3.583134412765503, - "learning_rate": 4.646386958751039e-05, - "loss": 0.9288, - "step": 24000 - }, - { - "epoch": 0.212256228009689, - "grad_norm": 3.410632371902466, - "learning_rate": 4.646239619983852e-05, - "loss": 0.7309, - "step": 24010 - }, - { - "epoch": 0.21234463127000125, - "grad_norm": 1.9905996322631836, - "learning_rate": 4.6460922812166646e-05, - "loss": 0.6831, - "step": 24020 - }, - { - "epoch": 0.21243303453031348, - "grad_norm": 5.136494159698486, - "learning_rate": 4.6459449424494775e-05, - "loss": 0.8327, - "step": 24030 - }, - { - "epoch": 0.2125214377906257, - "grad_norm": 8.443650245666504, - "learning_rate": 4.645797603682291e-05, - "loss": 0.768, - "step": 24040 - }, - { - "epoch": 0.21260984105093797, - "grad_norm": 8.261244773864746, - "learning_rate": 4.645650264915103e-05, - "loss": 0.7784, - "step": 24050 - }, - { - "epoch": 0.2126982443112502, - "grad_norm": 5.827929496765137, - "learning_rate": 4.645502926147917e-05, - "loss": 0.7436, - "step": 24060 - }, - { - "epoch": 0.21278664757156243, - "grad_norm": 7.286040782928467, - "learning_rate": 4.6453555873807295e-05, - "loss": 0.7721, - "step": 24070 - }, - { - "epoch": 0.2128750508318747, - "grad_norm": 5.581329345703125, - "learning_rate": 4.645208248613542e-05, - "loss": 0.8695, - "step": 24080 - }, - { - "epoch": 0.21296345409218692, - "grad_norm": 2.1000869274139404, - "learning_rate": 4.645060909846355e-05, - "loss": 0.7413, - "step": 24090 - }, - { - "epoch": 0.21305185735249915, - "grad_norm": 4.673179626464844, - "learning_rate": 4.644913571079168e-05, - "loss": 0.6993, - "step": 24100 - }, - { - "epoch": 0.2131402606128114, - "grad_norm": 5.403928279876709, - "learning_rate": 4.644766232311981e-05, - "loss": 0.7247, - "step": 24110 - }, - { - "epoch": 0.21322866387312364, - "grad_norm": 2.7159504890441895, - "learning_rate": 4.6446188935447944e-05, - "loss": 0.8364, - "step": 24120 - }, - { - "epoch": 0.21331706713343587, - "grad_norm": 6.9059062004089355, - "learning_rate": 4.644471554777607e-05, - "loss": 0.9024, - "step": 24130 - }, - { - "epoch": 0.21340547039374813, - "grad_norm": 8.990559577941895, - "learning_rate": 4.64432421601042e-05, - "loss": 0.7564, - "step": 24140 - }, - { - "epoch": 0.21349387365406036, - "grad_norm": 8.319303512573242, - "learning_rate": 4.644176877243233e-05, - "loss": 0.7636, - "step": 24150 - }, - { - "epoch": 0.2135822769143726, - "grad_norm": 5.187260627746582, - "learning_rate": 4.644029538476046e-05, - "loss": 0.7803, - "step": 24160 - }, - { - "epoch": 0.21367068017468485, - "grad_norm": 4.388763904571533, - "learning_rate": 4.6438821997088585e-05, - "loss": 0.7169, - "step": 24170 - }, - { - "epoch": 0.21375908343499708, - "grad_norm": 8.37094497680664, - "learning_rate": 4.643734860941672e-05, - "loss": 0.7542, - "step": 24180 - }, - { - "epoch": 0.2138474866953093, - "grad_norm": 2.8514420986175537, - "learning_rate": 4.643587522174485e-05, - "loss": 0.7043, - "step": 24190 - }, - { - "epoch": 0.21393588995562157, - "grad_norm": 11.018105506896973, - "learning_rate": 4.643440183407298e-05, - "loss": 0.8111, - "step": 24200 - }, - { - "epoch": 0.2140242932159338, - "grad_norm": 8.870553016662598, - "learning_rate": 4.6432928446401105e-05, - "loss": 0.7464, - "step": 24210 - }, - { - "epoch": 0.21411269647624603, - "grad_norm": 2.422752857208252, - "learning_rate": 4.6431455058729234e-05, - "loss": 0.7303, - "step": 24220 - }, - { - "epoch": 0.2142010997365583, - "grad_norm": 3.441040515899658, - "learning_rate": 4.642998167105736e-05, - "loss": 0.8015, - "step": 24230 - }, - { - "epoch": 0.21428950299687052, - "grad_norm": 4.044212818145752, - "learning_rate": 4.642850828338549e-05, - "loss": 0.6903, - "step": 24240 - }, - { - "epoch": 0.21437790625718275, - "grad_norm": 4.94411563873291, - "learning_rate": 4.6427034895713626e-05, - "loss": 0.7756, - "step": 24250 - }, - { - "epoch": 0.214466309517495, - "grad_norm": 20.273862838745117, - "learning_rate": 4.6425561508041754e-05, - "loss": 0.7523, - "step": 24260 - }, - { - "epoch": 0.21455471277780724, - "grad_norm": 3.986461639404297, - "learning_rate": 4.642408812036988e-05, - "loss": 0.8173, - "step": 24270 - }, - { - "epoch": 0.21464311603811947, - "grad_norm": 4.408081531524658, - "learning_rate": 4.642261473269801e-05, - "loss": 0.8444, - "step": 24280 - }, - { - "epoch": 0.21473151929843173, - "grad_norm": 4.525137901306152, - "learning_rate": 4.642114134502614e-05, - "loss": 0.9284, - "step": 24290 - }, - { - "epoch": 0.21481992255874396, - "grad_norm": 4.937618732452393, - "learning_rate": 4.641966795735427e-05, - "loss": 0.7936, - "step": 24300 - }, - { - "epoch": 0.2149083258190562, - "grad_norm": 2.9195120334625244, - "learning_rate": 4.64181945696824e-05, - "loss": 0.8288, - "step": 24310 - }, - { - "epoch": 0.21499672907936845, - "grad_norm": 8.352294921875, - "learning_rate": 4.641672118201053e-05, - "loss": 0.8597, - "step": 24320 - }, - { - "epoch": 0.21508513233968068, - "grad_norm": 6.399908065795898, - "learning_rate": 4.641524779433866e-05, - "loss": 0.7747, - "step": 24330 - }, - { - "epoch": 0.21517353559999292, - "grad_norm": 6.880349159240723, - "learning_rate": 4.641377440666679e-05, - "loss": 0.7494, - "step": 24340 - }, - { - "epoch": 0.21526193886030517, - "grad_norm": 4.56484317779541, - "learning_rate": 4.6412301018994916e-05, - "loss": 0.6893, - "step": 24350 - }, - { - "epoch": 0.2153503421206174, - "grad_norm": 4.912026405334473, - "learning_rate": 4.6410827631323044e-05, - "loss": 0.9449, - "step": 24360 - }, - { - "epoch": 0.21543874538092964, - "grad_norm": 3.681528329849243, - "learning_rate": 4.640935424365118e-05, - "loss": 0.7282, - "step": 24370 - }, - { - "epoch": 0.2155271486412419, - "grad_norm": 3.427675724029541, - "learning_rate": 4.64078808559793e-05, - "loss": 0.7578, - "step": 24380 - }, - { - "epoch": 0.21561555190155413, - "grad_norm": 6.079005241394043, - "learning_rate": 4.6406407468307436e-05, - "loss": 0.8165, - "step": 24390 - }, - { - "epoch": 0.21570395516186636, - "grad_norm": 5.2435784339904785, - "learning_rate": 4.6404934080635565e-05, - "loss": 0.8901, - "step": 24400 - }, - { - "epoch": 0.21579235842217862, - "grad_norm": 6.158112525939941, - "learning_rate": 4.640346069296369e-05, - "loss": 0.6585, - "step": 24410 - }, - { - "epoch": 0.21588076168249085, - "grad_norm": 6.721048355102539, - "learning_rate": 4.640198730529182e-05, - "loss": 0.6893, - "step": 24420 - }, - { - "epoch": 0.21596916494280308, - "grad_norm": 3.782916784286499, - "learning_rate": 4.6400513917619956e-05, - "loss": 0.7559, - "step": 24430 - }, - { - "epoch": 0.21605756820311534, - "grad_norm": 4.375424861907959, - "learning_rate": 4.639904052994808e-05, - "loss": 0.7981, - "step": 24440 - }, - { - "epoch": 0.21614597146342757, - "grad_norm": 6.188992023468018, - "learning_rate": 4.639756714227621e-05, - "loss": 0.853, - "step": 24450 - }, - { - "epoch": 0.21623437472373983, - "grad_norm": 3.779165267944336, - "learning_rate": 4.6396093754604335e-05, - "loss": 0.8897, - "step": 24460 - }, - { - "epoch": 0.21632277798405206, - "grad_norm": 4.001415729522705, - "learning_rate": 4.639462036693247e-05, - "loss": 0.7805, - "step": 24470 - }, - { - "epoch": 0.2164111812443643, - "grad_norm": 9.615870475769043, - "learning_rate": 4.63931469792606e-05, - "loss": 0.9249, - "step": 24480 - }, - { - "epoch": 0.21649958450467655, - "grad_norm": 2.7439322471618652, - "learning_rate": 4.6391673591588726e-05, - "loss": 0.7208, - "step": 24490 - }, - { - "epoch": 0.21658798776498878, - "grad_norm": 4.920548439025879, - "learning_rate": 4.6390200203916855e-05, - "loss": 0.8799, - "step": 24500 - }, - { - "epoch": 0.216676391025301, - "grad_norm": 4.9506072998046875, - "learning_rate": 4.638872681624499e-05, - "loss": 0.815, - "step": 24510 - }, - { - "epoch": 0.21676479428561327, - "grad_norm": 4.992445468902588, - "learning_rate": 4.638725342857311e-05, - "loss": 0.7988, - "step": 24520 - }, - { - "epoch": 0.2168531975459255, - "grad_norm": 3.184152841567993, - "learning_rate": 4.638578004090125e-05, - "loss": 0.7298, - "step": 24530 - }, - { - "epoch": 0.21694160080623773, - "grad_norm": 3.891679048538208, - "learning_rate": 4.6384306653229375e-05, - "loss": 0.7793, - "step": 24540 - }, - { - "epoch": 0.21703000406655, - "grad_norm": 2.918549060821533, - "learning_rate": 4.6382833265557503e-05, - "loss": 0.7285, - "step": 24550 - }, - { - "epoch": 0.21711840732686222, - "grad_norm": 2.4985158443450928, - "learning_rate": 4.638135987788563e-05, - "loss": 0.9261, - "step": 24560 - }, - { - "epoch": 0.21720681058717445, - "grad_norm": 4.97471284866333, - "learning_rate": 4.637988649021376e-05, - "loss": 0.8452, - "step": 24570 - }, - { - "epoch": 0.2172952138474867, - "grad_norm": 3.9032785892486572, - "learning_rate": 4.637841310254189e-05, - "loss": 0.7743, - "step": 24580 - }, - { - "epoch": 0.21738361710779894, - "grad_norm": 4.991990566253662, - "learning_rate": 4.6376939714870024e-05, - "loss": 0.7649, - "step": 24590 - }, - { - "epoch": 0.21747202036811117, - "grad_norm": 2.503690004348755, - "learning_rate": 4.6375466327198145e-05, - "loss": 0.7208, - "step": 24600 - }, - { - "epoch": 0.21756042362842343, - "grad_norm": 3.138916254043579, - "learning_rate": 4.637399293952628e-05, - "loss": 0.9338, - "step": 24610 - }, - { - "epoch": 0.21764882688873566, - "grad_norm": 5.933905601501465, - "learning_rate": 4.637251955185441e-05, - "loss": 0.7391, - "step": 24620 - }, - { - "epoch": 0.2177372301490479, - "grad_norm": 4.459654808044434, - "learning_rate": 4.637104616418254e-05, - "loss": 0.9008, - "step": 24630 - }, - { - "epoch": 0.21782563340936015, - "grad_norm": 4.548059940338135, - "learning_rate": 4.6369572776510665e-05, - "loss": 0.8563, - "step": 24640 - }, - { - "epoch": 0.21791403666967238, - "grad_norm": 1.5870684385299683, - "learning_rate": 4.63680993888388e-05, - "loss": 0.7146, - "step": 24650 - }, - { - "epoch": 0.2180024399299846, - "grad_norm": 6.337950229644775, - "learning_rate": 4.636662600116692e-05, - "loss": 0.8823, - "step": 24660 - }, - { - "epoch": 0.21809084319029687, - "grad_norm": 3.137346029281616, - "learning_rate": 4.636515261349506e-05, - "loss": 0.8795, - "step": 24670 - }, - { - "epoch": 0.2181792464506091, - "grad_norm": 4.048831462860107, - "learning_rate": 4.6363679225823186e-05, - "loss": 0.8347, - "step": 24680 - }, - { - "epoch": 0.21826764971092133, - "grad_norm": 5.1624555587768555, - "learning_rate": 4.6362205838151314e-05, - "loss": 0.7449, - "step": 24690 - }, - { - "epoch": 0.2183560529712336, - "grad_norm": 3.5979888439178467, - "learning_rate": 4.636073245047944e-05, - "loss": 0.8668, - "step": 24700 - }, - { - "epoch": 0.21844445623154582, - "grad_norm": 5.237533092498779, - "learning_rate": 4.635925906280757e-05, - "loss": 0.8265, - "step": 24710 - }, - { - "epoch": 0.21853285949185805, - "grad_norm": 4.043185710906982, - "learning_rate": 4.63577856751357e-05, - "loss": 0.8415, - "step": 24720 - }, - { - "epoch": 0.2186212627521703, - "grad_norm": 3.46463942527771, - "learning_rate": 4.6356312287463834e-05, - "loss": 0.8238, - "step": 24730 - }, - { - "epoch": 0.21870966601248254, - "grad_norm": 3.736070156097412, - "learning_rate": 4.6354838899791956e-05, - "loss": 0.7936, - "step": 24740 - }, - { - "epoch": 0.21879806927279477, - "grad_norm": 3.1564292907714844, - "learning_rate": 4.635336551212009e-05, - "loss": 0.7544, - "step": 24750 - }, - { - "epoch": 0.21888647253310703, - "grad_norm": 2.714038848876953, - "learning_rate": 4.635189212444822e-05, - "loss": 0.8748, - "step": 24760 - }, - { - "epoch": 0.21897487579341926, - "grad_norm": 2.51503324508667, - "learning_rate": 4.635041873677635e-05, - "loss": 0.8479, - "step": 24770 - }, - { - "epoch": 0.2190632790537315, - "grad_norm": 3.2833774089813232, - "learning_rate": 4.6348945349104476e-05, - "loss": 0.9123, - "step": 24780 - }, - { - "epoch": 0.21915168231404375, - "grad_norm": 11.11308479309082, - "learning_rate": 4.634747196143261e-05, - "loss": 0.8166, - "step": 24790 - }, - { - "epoch": 0.21924008557435598, - "grad_norm": 6.787374019622803, - "learning_rate": 4.634599857376073e-05, - "loss": 0.8701, - "step": 24800 - }, - { - "epoch": 0.2193284888346682, - "grad_norm": 5.895148754119873, - "learning_rate": 4.634452518608887e-05, - "loss": 0.7403, - "step": 24810 - }, - { - "epoch": 0.21941689209498047, - "grad_norm": 3.7027394771575928, - "learning_rate": 4.634305179841699e-05, - "loss": 0.8693, - "step": 24820 - }, - { - "epoch": 0.2195052953552927, - "grad_norm": 4.522027492523193, - "learning_rate": 4.6341578410745124e-05, - "loss": 0.7761, - "step": 24830 - }, - { - "epoch": 0.21959369861560493, - "grad_norm": 9.361669540405273, - "learning_rate": 4.634010502307325e-05, - "loss": 0.6888, - "step": 24840 - }, - { - "epoch": 0.2196821018759172, - "grad_norm": 4.22054386138916, - "learning_rate": 4.633863163540138e-05, - "loss": 0.7377, - "step": 24850 - }, - { - "epoch": 0.21977050513622942, - "grad_norm": 15.273462295532227, - "learning_rate": 4.633715824772951e-05, - "loss": 0.7282, - "step": 24860 - }, - { - "epoch": 0.21985890839654165, - "grad_norm": 6.5163493156433105, - "learning_rate": 4.6335684860057645e-05, - "loss": 0.8005, - "step": 24870 - }, - { - "epoch": 0.2199473116568539, - "grad_norm": 6.353625774383545, - "learning_rate": 4.6334211472385766e-05, - "loss": 0.7718, - "step": 24880 - }, - { - "epoch": 0.22003571491716614, - "grad_norm": 2.002206563949585, - "learning_rate": 4.63327380847139e-05, - "loss": 0.8258, - "step": 24890 - }, - { - "epoch": 0.22012411817747837, - "grad_norm": 2.044071674346924, - "learning_rate": 4.633126469704203e-05, - "loss": 0.8062, - "step": 24900 - }, - { - "epoch": 0.22021252143779063, - "grad_norm": 7.506868362426758, - "learning_rate": 4.632979130937016e-05, - "loss": 0.7715, - "step": 24910 - }, - { - "epoch": 0.22030092469810286, - "grad_norm": 4.116670608520508, - "learning_rate": 4.6328317921698286e-05, - "loss": 0.7053, - "step": 24920 - }, - { - "epoch": 0.2203893279584151, - "grad_norm": 3.8336546421051025, - "learning_rate": 4.6326844534026415e-05, - "loss": 0.8956, - "step": 24930 - }, - { - "epoch": 0.22047773121872735, - "grad_norm": 2.41890549659729, - "learning_rate": 4.632537114635454e-05, - "loss": 0.7353, - "step": 24940 - }, - { - "epoch": 0.22056613447903958, - "grad_norm": 5.446094512939453, - "learning_rate": 4.632389775868268e-05, - "loss": 0.7688, - "step": 24950 - }, - { - "epoch": 0.2206545377393518, - "grad_norm": 3.2237637042999268, - "learning_rate": 4.63224243710108e-05, - "loss": 0.7948, - "step": 24960 - }, - { - "epoch": 0.22074294099966407, - "grad_norm": 2.7062480449676514, - "learning_rate": 4.6320950983338935e-05, - "loss": 0.7024, - "step": 24970 - }, - { - "epoch": 0.2208313442599763, - "grad_norm": 2.661411762237549, - "learning_rate": 4.631947759566706e-05, - "loss": 0.7522, - "step": 24980 - }, - { - "epoch": 0.22091974752028856, - "grad_norm": 2.45198392868042, - "learning_rate": 4.631800420799519e-05, - "loss": 0.6827, - "step": 24990 - }, - { - "epoch": 0.2210081507806008, - "grad_norm": 15.193877220153809, - "learning_rate": 4.631653082032332e-05, - "loss": 0.8492, - "step": 25000 - }, - { - "epoch": 0.22109655404091302, - "grad_norm": 6.492677688598633, - "learning_rate": 4.6315057432651455e-05, - "loss": 0.8934, - "step": 25010 - }, - { - "epoch": 0.22118495730122528, - "grad_norm": 4.400557518005371, - "learning_rate": 4.631358404497958e-05, - "loss": 0.7585, - "step": 25020 - }, - { - "epoch": 0.2212733605615375, - "grad_norm": 4.58823823928833, - "learning_rate": 4.631211065730771e-05, - "loss": 0.8256, - "step": 25030 - }, - { - "epoch": 0.22136176382184974, - "grad_norm": 2.6293187141418457, - "learning_rate": 4.631063726963584e-05, - "loss": 0.7012, - "step": 25040 - }, - { - "epoch": 0.221450167082162, - "grad_norm": 4.5372395515441895, - "learning_rate": 4.630916388196397e-05, - "loss": 0.8542, - "step": 25050 - }, - { - "epoch": 0.22153857034247423, - "grad_norm": 6.668642997741699, - "learning_rate": 4.63076904942921e-05, - "loss": 0.9586, - "step": 25060 - }, - { - "epoch": 0.22162697360278646, - "grad_norm": 6.260819435119629, - "learning_rate": 4.6306217106620225e-05, - "loss": 0.8067, - "step": 25070 - }, - { - "epoch": 0.22171537686309872, - "grad_norm": 6.783611297607422, - "learning_rate": 4.6304743718948354e-05, - "loss": 0.8143, - "step": 25080 - }, - { - "epoch": 0.22180378012341095, - "grad_norm": 5.217430114746094, - "learning_rate": 4.630327033127649e-05, - "loss": 0.807, - "step": 25090 - }, - { - "epoch": 0.22189218338372318, - "grad_norm": 7.699872016906738, - "learning_rate": 4.630179694360462e-05, - "loss": 0.7318, - "step": 25100 - }, - { - "epoch": 0.22198058664403544, - "grad_norm": 3.798049211502075, - "learning_rate": 4.6300323555932745e-05, - "loss": 0.7622, - "step": 25110 - }, - { - "epoch": 0.22206898990434767, - "grad_norm": 8.742514610290527, - "learning_rate": 4.6298850168260874e-05, - "loss": 0.7571, - "step": 25120 - }, - { - "epoch": 0.2221573931646599, - "grad_norm": 2.609999418258667, - "learning_rate": 4.6297376780589e-05, - "loss": 0.7713, - "step": 25130 - }, - { - "epoch": 0.22224579642497216, - "grad_norm": 3.4092695713043213, - "learning_rate": 4.629590339291713e-05, - "loss": 0.7371, - "step": 25140 - }, - { - "epoch": 0.2223341996852844, - "grad_norm": 3.388076066970825, - "learning_rate": 4.6294430005245266e-05, - "loss": 0.8635, - "step": 25150 - }, - { - "epoch": 0.22242260294559663, - "grad_norm": 4.0364990234375, - "learning_rate": 4.6292956617573394e-05, - "loss": 0.777, - "step": 25160 - }, - { - "epoch": 0.22251100620590888, - "grad_norm": 8.548382759094238, - "learning_rate": 4.629148322990152e-05, - "loss": 0.7714, - "step": 25170 - }, - { - "epoch": 0.22259940946622112, - "grad_norm": 9.222799301147461, - "learning_rate": 4.629000984222965e-05, - "loss": 0.7877, - "step": 25180 - }, - { - "epoch": 0.22268781272653335, - "grad_norm": 4.6049485206604, - "learning_rate": 4.628853645455778e-05, - "loss": 0.7727, - "step": 25190 - }, - { - "epoch": 0.2227762159868456, - "grad_norm": 14.871455192565918, - "learning_rate": 4.628706306688591e-05, - "loss": 0.7477, - "step": 25200 - }, - { - "epoch": 0.22286461924715784, - "grad_norm": 4.842946529388428, - "learning_rate": 4.6285589679214036e-05, - "loss": 0.7824, - "step": 25210 - }, - { - "epoch": 0.22295302250747007, - "grad_norm": 4.010987281799316, - "learning_rate": 4.628411629154217e-05, - "loss": 0.7365, - "step": 25220 - }, - { - "epoch": 0.22304142576778233, - "grad_norm": 5.73057746887207, - "learning_rate": 4.62826429038703e-05, - "loss": 0.7973, - "step": 25230 - }, - { - "epoch": 0.22312982902809456, - "grad_norm": 3.9699220657348633, - "learning_rate": 4.628116951619843e-05, - "loss": 0.8923, - "step": 25240 - }, - { - "epoch": 0.2232182322884068, - "grad_norm": 2.9780991077423096, - "learning_rate": 4.6279696128526556e-05, - "loss": 0.6232, - "step": 25250 - }, - { - "epoch": 0.22330663554871905, - "grad_norm": 5.854918003082275, - "learning_rate": 4.6278222740854684e-05, - "loss": 0.9031, - "step": 25260 - }, - { - "epoch": 0.22339503880903128, - "grad_norm": 7.665713787078857, - "learning_rate": 4.627674935318281e-05, - "loss": 0.8961, - "step": 25270 - }, - { - "epoch": 0.2234834420693435, - "grad_norm": 4.15420389175415, - "learning_rate": 4.627527596551095e-05, - "loss": 0.8783, - "step": 25280 - }, - { - "epoch": 0.22357184532965577, - "grad_norm": 3.887390375137329, - "learning_rate": 4.627380257783907e-05, - "loss": 0.8838, - "step": 25290 - }, - { - "epoch": 0.223660248589968, - "grad_norm": 2.9458789825439453, - "learning_rate": 4.6272329190167204e-05, - "loss": 0.7953, - "step": 25300 - }, - { - "epoch": 0.22374865185028023, - "grad_norm": 7.877856731414795, - "learning_rate": 4.627085580249533e-05, - "loss": 0.8599, - "step": 25310 - }, - { - "epoch": 0.2238370551105925, - "grad_norm": 2.4428250789642334, - "learning_rate": 4.626938241482346e-05, - "loss": 0.7414, - "step": 25320 - }, - { - "epoch": 0.22392545837090472, - "grad_norm": 4.274850368499756, - "learning_rate": 4.626790902715159e-05, - "loss": 0.7594, - "step": 25330 - }, - { - "epoch": 0.22401386163121695, - "grad_norm": 7.199704170227051, - "learning_rate": 4.6266435639479725e-05, - "loss": 0.7728, - "step": 25340 - }, - { - "epoch": 0.2241022648915292, - "grad_norm": 4.567758560180664, - "learning_rate": 4.6264962251807846e-05, - "loss": 0.7248, - "step": 25350 - }, - { - "epoch": 0.22419066815184144, - "grad_norm": 4.301654815673828, - "learning_rate": 4.626348886413598e-05, - "loss": 0.7015, - "step": 25360 - }, - { - "epoch": 0.22427907141215367, - "grad_norm": 2.2330312728881836, - "learning_rate": 4.626201547646411e-05, - "loss": 0.6446, - "step": 25370 - }, - { - "epoch": 0.22436747467246593, - "grad_norm": 6.475268840789795, - "learning_rate": 4.626054208879224e-05, - "loss": 0.7297, - "step": 25380 - }, - { - "epoch": 0.22445587793277816, - "grad_norm": 2.9262239933013916, - "learning_rate": 4.6259068701120366e-05, - "loss": 0.7867, - "step": 25390 - }, - { - "epoch": 0.2245442811930904, - "grad_norm": 6.337355613708496, - "learning_rate": 4.6257595313448495e-05, - "loss": 0.8341, - "step": 25400 - }, - { - "epoch": 0.22463268445340265, - "grad_norm": 2.55621337890625, - "learning_rate": 4.625612192577662e-05, - "loss": 0.8733, - "step": 25410 - }, - { - "epoch": 0.22472108771371488, - "grad_norm": 7.797390460968018, - "learning_rate": 4.625464853810476e-05, - "loss": 0.7046, - "step": 25420 - }, - { - "epoch": 0.2248094909740271, - "grad_norm": 9.955615997314453, - "learning_rate": 4.625317515043288e-05, - "loss": 0.7353, - "step": 25430 - }, - { - "epoch": 0.22489789423433937, - "grad_norm": 2.787454605102539, - "learning_rate": 4.6251701762761015e-05, - "loss": 0.7961, - "step": 25440 - }, - { - "epoch": 0.2249862974946516, - "grad_norm": 5.0545549392700195, - "learning_rate": 4.625022837508914e-05, - "loss": 0.7113, - "step": 25450 - }, - { - "epoch": 0.22507470075496383, - "grad_norm": 2.6684982776641846, - "learning_rate": 4.624875498741727e-05, - "loss": 0.7372, - "step": 25460 - }, - { - "epoch": 0.2251631040152761, - "grad_norm": 2.917710542678833, - "learning_rate": 4.62472815997454e-05, - "loss": 0.7434, - "step": 25470 - }, - { - "epoch": 0.22525150727558832, - "grad_norm": 3.460366725921631, - "learning_rate": 4.6245808212073535e-05, - "loss": 0.7249, - "step": 25480 - }, - { - "epoch": 0.22533991053590055, - "grad_norm": 3.279576539993286, - "learning_rate": 4.624433482440166e-05, - "loss": 0.7065, - "step": 25490 - }, - { - "epoch": 0.2254283137962128, - "grad_norm": 2.606126070022583, - "learning_rate": 4.624286143672979e-05, - "loss": 0.8126, - "step": 25500 - }, - { - "epoch": 0.22551671705652504, - "grad_norm": 2.7988295555114746, - "learning_rate": 4.6241388049057913e-05, - "loss": 0.8257, - "step": 25510 - }, - { - "epoch": 0.2256051203168373, - "grad_norm": 2.0422515869140625, - "learning_rate": 4.623991466138605e-05, - "loss": 0.6915, - "step": 25520 - }, - { - "epoch": 0.22569352357714953, - "grad_norm": 2.5446791648864746, - "learning_rate": 4.623844127371418e-05, - "loss": 0.6996, - "step": 25530 - }, - { - "epoch": 0.22578192683746176, - "grad_norm": 7.3593549728393555, - "learning_rate": 4.6236967886042305e-05, - "loss": 0.8156, - "step": 25540 - }, - { - "epoch": 0.22587033009777402, - "grad_norm": 7.084205150604248, - "learning_rate": 4.6235494498370434e-05, - "loss": 0.7561, - "step": 25550 - }, - { - "epoch": 0.22595873335808625, - "grad_norm": 4.026143550872803, - "learning_rate": 4.623402111069857e-05, - "loss": 0.8163, - "step": 25560 - }, - { - "epoch": 0.22604713661839848, - "grad_norm": 9.809626579284668, - "learning_rate": 4.623254772302669e-05, - "loss": 0.7599, - "step": 25570 - }, - { - "epoch": 0.22613553987871074, - "grad_norm": 5.922295093536377, - "learning_rate": 4.6231074335354825e-05, - "loss": 0.7928, - "step": 25580 - }, - { - "epoch": 0.22622394313902297, - "grad_norm": 8.087105751037598, - "learning_rate": 4.6229600947682954e-05, - "loss": 0.8638, - "step": 25590 - }, - { - "epoch": 0.2263123463993352, - "grad_norm": 1.9569982290267944, - "learning_rate": 4.622812756001108e-05, - "loss": 0.817, - "step": 25600 - }, - { - "epoch": 0.22640074965964746, - "grad_norm": 3.5140202045440674, - "learning_rate": 4.622665417233921e-05, - "loss": 0.7167, - "step": 25610 - }, - { - "epoch": 0.2264891529199597, - "grad_norm": 1.2821376323699951, - "learning_rate": 4.6225180784667346e-05, - "loss": 0.6847, - "step": 25620 - }, - { - "epoch": 0.22657755618027192, - "grad_norm": 4.603971004486084, - "learning_rate": 4.622370739699547e-05, - "loss": 0.6777, - "step": 25630 - }, - { - "epoch": 0.22666595944058418, - "grad_norm": 2.99287748336792, - "learning_rate": 4.62222340093236e-05, - "loss": 0.7352, - "step": 25640 - }, - { - "epoch": 0.2267543627008964, - "grad_norm": 13.168510437011719, - "learning_rate": 4.6220760621651724e-05, - "loss": 0.889, - "step": 25650 - }, - { - "epoch": 0.22684276596120864, - "grad_norm": 3.5099494457244873, - "learning_rate": 4.621928723397986e-05, - "loss": 0.8372, - "step": 25660 - }, - { - "epoch": 0.2269311692215209, - "grad_norm": 1.6717056035995483, - "learning_rate": 4.621781384630799e-05, - "loss": 0.584, - "step": 25670 - }, - { - "epoch": 0.22701957248183313, - "grad_norm": 2.5716476440429688, - "learning_rate": 4.6216340458636116e-05, - "loss": 0.8171, - "step": 25680 - }, - { - "epoch": 0.22710797574214536, - "grad_norm": 7.323851585388184, - "learning_rate": 4.6214867070964244e-05, - "loss": 0.7505, - "step": 25690 - }, - { - "epoch": 0.22719637900245762, - "grad_norm": 9.877863883972168, - "learning_rate": 4.621339368329238e-05, - "loss": 0.7003, - "step": 25700 - }, - { - "epoch": 0.22728478226276985, - "grad_norm": 5.2179646492004395, - "learning_rate": 4.62119202956205e-05, - "loss": 0.903, - "step": 25710 - }, - { - "epoch": 0.22737318552308208, - "grad_norm": 7.904167175292969, - "learning_rate": 4.6210446907948636e-05, - "loss": 0.835, - "step": 25720 - }, - { - "epoch": 0.22746158878339434, - "grad_norm": 4.367486476898193, - "learning_rate": 4.6208973520276764e-05, - "loss": 0.6942, - "step": 25730 - }, - { - "epoch": 0.22754999204370657, - "grad_norm": 8.75403118133545, - "learning_rate": 4.620750013260489e-05, - "loss": 0.7477, - "step": 25740 - }, - { - "epoch": 0.2276383953040188, - "grad_norm": 4.122621536254883, - "learning_rate": 4.620602674493302e-05, - "loss": 0.872, - "step": 25750 - }, - { - "epoch": 0.22772679856433106, - "grad_norm": 2.2306032180786133, - "learning_rate": 4.620455335726115e-05, - "loss": 0.7103, - "step": 25760 - }, - { - "epoch": 0.2278152018246433, - "grad_norm": 13.444779396057129, - "learning_rate": 4.620307996958928e-05, - "loss": 0.8849, - "step": 25770 - }, - { - "epoch": 0.22790360508495552, - "grad_norm": 10.491046905517578, - "learning_rate": 4.620160658191741e-05, - "loss": 0.8103, - "step": 25780 - }, - { - "epoch": 0.22799200834526778, - "grad_norm": 2.5766170024871826, - "learning_rate": 4.6200133194245534e-05, - "loss": 0.8632, - "step": 25790 - }, - { - "epoch": 0.22808041160558, - "grad_norm": 2.156754493713379, - "learning_rate": 4.619865980657367e-05, - "loss": 0.7755, - "step": 25800 - }, - { - "epoch": 0.22816881486589224, - "grad_norm": 4.522340774536133, - "learning_rate": 4.61971864189018e-05, - "loss": 0.6697, - "step": 25810 - }, - { - "epoch": 0.2282572181262045, - "grad_norm": 5.178337574005127, - "learning_rate": 4.6195713031229926e-05, - "loss": 0.7709, - "step": 25820 - }, - { - "epoch": 0.22834562138651673, - "grad_norm": 4.399623870849609, - "learning_rate": 4.6194239643558055e-05, - "loss": 0.753, - "step": 25830 - }, - { - "epoch": 0.22843402464682896, - "grad_norm": 10.465398788452148, - "learning_rate": 4.619276625588619e-05, - "loss": 0.764, - "step": 25840 - }, - { - "epoch": 0.22852242790714122, - "grad_norm": 4.597443103790283, - "learning_rate": 4.619129286821431e-05, - "loss": 0.8505, - "step": 25850 - }, - { - "epoch": 0.22861083116745345, - "grad_norm": 2.543112277984619, - "learning_rate": 4.6189819480542447e-05, - "loss": 0.8435, - "step": 25860 - }, - { - "epoch": 0.22869923442776569, - "grad_norm": 8.69192123413086, - "learning_rate": 4.618834609287057e-05, - "loss": 0.73, - "step": 25870 - }, - { - "epoch": 0.22878763768807794, - "grad_norm": 4.735687255859375, - "learning_rate": 4.61868727051987e-05, - "loss": 0.7655, - "step": 25880 - }, - { - "epoch": 0.22887604094839017, - "grad_norm": 7.194514751434326, - "learning_rate": 4.618539931752683e-05, - "loss": 0.7434, - "step": 25890 - }, - { - "epoch": 0.2289644442087024, - "grad_norm": 3.0040442943573, - "learning_rate": 4.618392592985496e-05, - "loss": 0.7199, - "step": 25900 - }, - { - "epoch": 0.22905284746901466, - "grad_norm": 8.181593894958496, - "learning_rate": 4.618245254218309e-05, - "loss": 0.6733, - "step": 25910 - }, - { - "epoch": 0.2291412507293269, - "grad_norm": 4.748161315917969, - "learning_rate": 4.6180979154511223e-05, - "loss": 0.7654, - "step": 25920 - }, - { - "epoch": 0.22922965398963913, - "grad_norm": 5.968911170959473, - "learning_rate": 4.6179505766839345e-05, - "loss": 0.8292, - "step": 25930 - }, - { - "epoch": 0.22931805724995138, - "grad_norm": 1.8074452877044678, - "learning_rate": 4.617803237916748e-05, - "loss": 0.7501, - "step": 25940 - }, - { - "epoch": 0.22940646051026362, - "grad_norm": 7.659058570861816, - "learning_rate": 4.617655899149561e-05, - "loss": 0.7532, - "step": 25950 - }, - { - "epoch": 0.22949486377057585, - "grad_norm": 6.085169315338135, - "learning_rate": 4.617508560382374e-05, - "loss": 0.7895, - "step": 25960 - }, - { - "epoch": 0.2295832670308881, - "grad_norm": 19.16455841064453, - "learning_rate": 4.6173612216151865e-05, - "loss": 0.7464, - "step": 25970 - }, - { - "epoch": 0.22967167029120034, - "grad_norm": 6.268245697021484, - "learning_rate": 4.6172138828479994e-05, - "loss": 0.7581, - "step": 25980 - }, - { - "epoch": 0.22976007355151257, - "grad_norm": 3.595996141433716, - "learning_rate": 4.617066544080812e-05, - "loss": 0.7438, - "step": 25990 - }, - { - "epoch": 0.22984847681182483, - "grad_norm": 5.319154739379883, - "learning_rate": 4.616919205313626e-05, - "loss": 0.8218, - "step": 26000 - }, - { - "epoch": 0.22993688007213706, - "grad_norm": 5.387119770050049, - "learning_rate": 4.6167718665464385e-05, - "loss": 0.8435, - "step": 26010 - }, - { - "epoch": 0.2300252833324493, - "grad_norm": 2.5516552925109863, - "learning_rate": 4.6166245277792514e-05, - "loss": 0.8539, - "step": 26020 - }, - { - "epoch": 0.23011368659276155, - "grad_norm": 4.0663862228393555, - "learning_rate": 4.616477189012064e-05, - "loss": 0.7074, - "step": 26030 - }, - { - "epoch": 0.23020208985307378, - "grad_norm": 1.879555344581604, - "learning_rate": 4.616329850244877e-05, - "loss": 0.9003, - "step": 26040 - }, - { - "epoch": 0.230290493113386, - "grad_norm": 4.159968376159668, - "learning_rate": 4.61618251147769e-05, - "loss": 0.7554, - "step": 26050 - }, - { - "epoch": 0.23037889637369827, - "grad_norm": 8.559286117553711, - "learning_rate": 4.6160351727105034e-05, - "loss": 0.6224, - "step": 26060 - }, - { - "epoch": 0.2304672996340105, - "grad_norm": 6.309031009674072, - "learning_rate": 4.615887833943316e-05, - "loss": 0.7677, - "step": 26070 - }, - { - "epoch": 0.23055570289432276, - "grad_norm": 8.937350273132324, - "learning_rate": 4.615740495176129e-05, - "loss": 0.7323, - "step": 26080 - }, - { - "epoch": 0.230644106154635, - "grad_norm": 2.526836633682251, - "learning_rate": 4.615593156408942e-05, - "loss": 0.7272, - "step": 26090 - }, - { - "epoch": 0.23073250941494722, - "grad_norm": 2.119723081588745, - "learning_rate": 4.615445817641755e-05, - "loss": 0.7469, - "step": 26100 - }, - { - "epoch": 0.23082091267525948, - "grad_norm": 4.8421502113342285, - "learning_rate": 4.6152984788745676e-05, - "loss": 0.9137, - "step": 26110 - }, - { - "epoch": 0.2309093159355717, - "grad_norm": 4.467935562133789, - "learning_rate": 4.6151511401073804e-05, - "loss": 0.8161, - "step": 26120 - }, - { - "epoch": 0.23099771919588394, - "grad_norm": 4.147600173950195, - "learning_rate": 4.615003801340194e-05, - "loss": 0.9402, - "step": 26130 - }, - { - "epoch": 0.2310861224561962, - "grad_norm": 3.4456043243408203, - "learning_rate": 4.614856462573007e-05, - "loss": 0.7644, - "step": 26140 - }, - { - "epoch": 0.23117452571650843, - "grad_norm": 3.3381314277648926, - "learning_rate": 4.6147091238058196e-05, - "loss": 0.7687, - "step": 26150 - }, - { - "epoch": 0.23126292897682066, - "grad_norm": 5.1820549964904785, - "learning_rate": 4.6145617850386324e-05, - "loss": 0.858, - "step": 26160 - }, - { - "epoch": 0.23135133223713292, - "grad_norm": 2.9732067584991455, - "learning_rate": 4.614414446271445e-05, - "loss": 0.762, - "step": 26170 - }, - { - "epoch": 0.23143973549744515, - "grad_norm": 4.63769006729126, - "learning_rate": 4.614267107504258e-05, - "loss": 0.672, - "step": 26180 - }, - { - "epoch": 0.23152813875775738, - "grad_norm": 5.112890720367432, - "learning_rate": 4.6141197687370716e-05, - "loss": 0.629, - "step": 26190 - }, - { - "epoch": 0.23161654201806964, - "grad_norm": 3.3782641887664795, - "learning_rate": 4.6139724299698844e-05, - "loss": 0.6637, - "step": 26200 - }, - { - "epoch": 0.23170494527838187, - "grad_norm": 2.266312599182129, - "learning_rate": 4.613825091202697e-05, - "loss": 0.9511, - "step": 26210 - }, - { - "epoch": 0.2317933485386941, - "grad_norm": 8.544103622436523, - "learning_rate": 4.61367775243551e-05, - "loss": 0.8023, - "step": 26220 - }, - { - "epoch": 0.23188175179900636, - "grad_norm": 3.6856672763824463, - "learning_rate": 4.613530413668323e-05, - "loss": 0.7015, - "step": 26230 - }, - { - "epoch": 0.2319701550593186, - "grad_norm": 17.594270706176758, - "learning_rate": 4.613383074901136e-05, - "loss": 0.7831, - "step": 26240 - }, - { - "epoch": 0.23205855831963082, - "grad_norm": 4.329131603240967, - "learning_rate": 4.613235736133949e-05, - "loss": 0.787, - "step": 26250 - }, - { - "epoch": 0.23214696157994308, - "grad_norm": 7.807017803192139, - "learning_rate": 4.6130883973667615e-05, - "loss": 0.7351, - "step": 26260 - }, - { - "epoch": 0.2322353648402553, - "grad_norm": 5.564336776733398, - "learning_rate": 4.612941058599575e-05, - "loss": 0.8509, - "step": 26270 - }, - { - "epoch": 0.23232376810056754, - "grad_norm": 4.030319690704346, - "learning_rate": 4.612793719832388e-05, - "loss": 0.7824, - "step": 26280 - }, - { - "epoch": 0.2324121713608798, - "grad_norm": 3.7925074100494385, - "learning_rate": 4.6126463810652006e-05, - "loss": 0.741, - "step": 26290 - }, - { - "epoch": 0.23250057462119203, - "grad_norm": 5.255957126617432, - "learning_rate": 4.6124990422980135e-05, - "loss": 0.8586, - "step": 26300 - }, - { - "epoch": 0.23258897788150426, - "grad_norm": 2.7999472618103027, - "learning_rate": 4.612351703530827e-05, - "loss": 0.7317, - "step": 26310 - }, - { - "epoch": 0.23267738114181652, - "grad_norm": 7.2217254638671875, - "learning_rate": 4.612204364763639e-05, - "loss": 0.8289, - "step": 26320 - }, - { - "epoch": 0.23276578440212875, - "grad_norm": 2.340519666671753, - "learning_rate": 4.6120570259964527e-05, - "loss": 0.7462, - "step": 26330 - }, - { - "epoch": 0.23285418766244098, - "grad_norm": 11.22556209564209, - "learning_rate": 4.611909687229265e-05, - "loss": 0.7649, - "step": 26340 - }, - { - "epoch": 0.23294259092275324, - "grad_norm": 2.4115490913391113, - "learning_rate": 4.611762348462078e-05, - "loss": 0.8036, - "step": 26350 - }, - { - "epoch": 0.23303099418306547, - "grad_norm": 5.332367897033691, - "learning_rate": 4.611615009694891e-05, - "loss": 0.8238, - "step": 26360 - }, - { - "epoch": 0.2331193974433777, - "grad_norm": 3.896395683288574, - "learning_rate": 4.611467670927704e-05, - "loss": 0.6377, - "step": 26370 - }, - { - "epoch": 0.23320780070368996, - "grad_norm": 3.6120071411132812, - "learning_rate": 4.611320332160517e-05, - "loss": 0.6768, - "step": 26380 - }, - { - "epoch": 0.2332962039640022, - "grad_norm": 4.14366340637207, - "learning_rate": 4.6111729933933303e-05, - "loss": 0.8205, - "step": 26390 - }, - { - "epoch": 0.23338460722431442, - "grad_norm": 4.731985569000244, - "learning_rate": 4.6110256546261425e-05, - "loss": 0.7406, - "step": 26400 - }, - { - "epoch": 0.23347301048462668, - "grad_norm": 5.177968502044678, - "learning_rate": 4.610878315858956e-05, - "loss": 0.8204, - "step": 26410 - }, - { - "epoch": 0.2335614137449389, - "grad_norm": 1.977300763130188, - "learning_rate": 4.610730977091769e-05, - "loss": 0.8042, - "step": 26420 - }, - { - "epoch": 0.23364981700525114, - "grad_norm": 5.712028503417969, - "learning_rate": 4.610583638324582e-05, - "loss": 0.8159, - "step": 26430 - }, - { - "epoch": 0.2337382202655634, - "grad_norm": 21.36813735961914, - "learning_rate": 4.6104362995573945e-05, - "loss": 0.8236, - "step": 26440 - }, - { - "epoch": 0.23382662352587563, - "grad_norm": 9.561662673950195, - "learning_rate": 4.6102889607902074e-05, - "loss": 0.6932, - "step": 26450 - }, - { - "epoch": 0.23391502678618786, - "grad_norm": 2.5096518993377686, - "learning_rate": 4.61014162202302e-05, - "loss": 0.7873, - "step": 26460 - }, - { - "epoch": 0.23400343004650012, - "grad_norm": 14.823768615722656, - "learning_rate": 4.609994283255834e-05, - "loss": 0.8304, - "step": 26470 - }, - { - "epoch": 0.23409183330681235, - "grad_norm": 9.787788391113281, - "learning_rate": 4.609846944488646e-05, - "loss": 0.7191, - "step": 26480 - }, - { - "epoch": 0.23418023656712458, - "grad_norm": 7.436131477355957, - "learning_rate": 4.6096996057214594e-05, - "loss": 0.9062, - "step": 26490 - }, - { - "epoch": 0.23426863982743684, - "grad_norm": 5.646495342254639, - "learning_rate": 4.609552266954272e-05, - "loss": 0.9358, - "step": 26500 - }, - { - "epoch": 0.23435704308774907, - "grad_norm": 11.70395565032959, - "learning_rate": 4.609404928187085e-05, - "loss": 0.8903, - "step": 26510 - }, - { - "epoch": 0.2344454463480613, - "grad_norm": 3.845158100128174, - "learning_rate": 4.609257589419898e-05, - "loss": 0.8289, - "step": 26520 - }, - { - "epoch": 0.23453384960837356, - "grad_norm": 6.0214948654174805, - "learning_rate": 4.6091102506527114e-05, - "loss": 0.7012, - "step": 26530 - }, - { - "epoch": 0.2346222528686858, - "grad_norm": 7.993401050567627, - "learning_rate": 4.6089629118855236e-05, - "loss": 0.8474, - "step": 26540 - }, - { - "epoch": 0.23471065612899802, - "grad_norm": 8.60523509979248, - "learning_rate": 4.608815573118337e-05, - "loss": 0.781, - "step": 26550 - }, - { - "epoch": 0.23479905938931028, - "grad_norm": 3.2778806686401367, - "learning_rate": 4.60866823435115e-05, - "loss": 0.762, - "step": 26560 - }, - { - "epoch": 0.2348874626496225, - "grad_norm": 3.393656015396118, - "learning_rate": 4.608520895583963e-05, - "loss": 0.7628, - "step": 26570 - }, - { - "epoch": 0.23497586590993474, - "grad_norm": 2.1216790676116943, - "learning_rate": 4.6083735568167756e-05, - "loss": 0.7846, - "step": 26580 - }, - { - "epoch": 0.235064269170247, - "grad_norm": 4.257018089294434, - "learning_rate": 4.6082262180495884e-05, - "loss": 0.7494, - "step": 26590 - }, - { - "epoch": 0.23515267243055923, - "grad_norm": 4.937124252319336, - "learning_rate": 4.608078879282401e-05, - "loss": 0.7143, - "step": 26600 - }, - { - "epoch": 0.2352410756908715, - "grad_norm": 1.3708324432373047, - "learning_rate": 4.607931540515215e-05, - "loss": 0.7661, - "step": 26610 - }, - { - "epoch": 0.23532947895118372, - "grad_norm": 6.229585647583008, - "learning_rate": 4.607784201748027e-05, - "loss": 0.839, - "step": 26620 - }, - { - "epoch": 0.23541788221149595, - "grad_norm": 4.585300922393799, - "learning_rate": 4.6076368629808404e-05, - "loss": 0.7521, - "step": 26630 - }, - { - "epoch": 0.2355062854718082, - "grad_norm": 7.729150295257568, - "learning_rate": 4.607489524213653e-05, - "loss": 0.638, - "step": 26640 - }, - { - "epoch": 0.23559468873212044, - "grad_norm": 1.7038516998291016, - "learning_rate": 4.607342185446466e-05, - "loss": 0.7783, - "step": 26650 - }, - { - "epoch": 0.23568309199243268, - "grad_norm": 2.533660411834717, - "learning_rate": 4.607194846679279e-05, - "loss": 0.7886, - "step": 26660 - }, - { - "epoch": 0.23577149525274493, - "grad_norm": 5.983635425567627, - "learning_rate": 4.6070475079120924e-05, - "loss": 0.8338, - "step": 26670 - }, - { - "epoch": 0.23585989851305716, - "grad_norm": 1.9739922285079956, - "learning_rate": 4.6069001691449046e-05, - "loss": 0.6989, - "step": 26680 - }, - { - "epoch": 0.2359483017733694, - "grad_norm": 3.406186819076538, - "learning_rate": 4.606752830377718e-05, - "loss": 0.7917, - "step": 26690 - }, - { - "epoch": 0.23603670503368165, - "grad_norm": 5.995182514190674, - "learning_rate": 4.60660549161053e-05, - "loss": 0.7603, - "step": 26700 - }, - { - "epoch": 0.23612510829399389, - "grad_norm": 6.2465009689331055, - "learning_rate": 4.606458152843344e-05, - "loss": 0.7102, - "step": 26710 - }, - { - "epoch": 0.23621351155430612, - "grad_norm": 4.614730358123779, - "learning_rate": 4.6063108140761566e-05, - "loss": 0.8176, - "step": 26720 - }, - { - "epoch": 0.23630191481461837, - "grad_norm": 1.6852394342422485, - "learning_rate": 4.6061634753089695e-05, - "loss": 0.8228, - "step": 26730 - }, - { - "epoch": 0.2363903180749306, - "grad_norm": 2.212242841720581, - "learning_rate": 4.606016136541782e-05, - "loss": 0.6936, - "step": 26740 - }, - { - "epoch": 0.23647872133524284, - "grad_norm": 1.9207688570022583, - "learning_rate": 4.605868797774596e-05, - "loss": 0.7462, - "step": 26750 - }, - { - "epoch": 0.2365671245955551, - "grad_norm": 5.530776023864746, - "learning_rate": 4.605721459007408e-05, - "loss": 0.8081, - "step": 26760 - }, - { - "epoch": 0.23665552785586733, - "grad_norm": 6.019812107086182, - "learning_rate": 4.6055741202402215e-05, - "loss": 0.8332, - "step": 26770 - }, - { - "epoch": 0.23674393111617956, - "grad_norm": 4.056359767913818, - "learning_rate": 4.605426781473034e-05, - "loss": 0.7116, - "step": 26780 - }, - { - "epoch": 0.23683233437649182, - "grad_norm": 17.098737716674805, - "learning_rate": 4.605279442705847e-05, - "loss": 0.8057, - "step": 26790 - }, - { - "epoch": 0.23692073763680405, - "grad_norm": 3.866529703140259, - "learning_rate": 4.60513210393866e-05, - "loss": 0.7172, - "step": 26800 - }, - { - "epoch": 0.23700914089711628, - "grad_norm": 4.999325752258301, - "learning_rate": 4.604984765171473e-05, - "loss": 0.7121, - "step": 26810 - }, - { - "epoch": 0.23709754415742854, - "grad_norm": 4.941673278808594, - "learning_rate": 4.6048374264042857e-05, - "loss": 0.616, - "step": 26820 - }, - { - "epoch": 0.23718594741774077, - "grad_norm": 8.706204414367676, - "learning_rate": 4.604690087637099e-05, - "loss": 0.9083, - "step": 26830 - }, - { - "epoch": 0.237274350678053, - "grad_norm": 6.6517205238342285, - "learning_rate": 4.604542748869911e-05, - "loss": 0.6911, - "step": 26840 - }, - { - "epoch": 0.23736275393836526, - "grad_norm": 5.494711875915527, - "learning_rate": 4.604395410102725e-05, - "loss": 0.7714, - "step": 26850 - }, - { - "epoch": 0.2374511571986775, - "grad_norm": 5.264119625091553, - "learning_rate": 4.604248071335538e-05, - "loss": 0.6888, - "step": 26860 - }, - { - "epoch": 0.23753956045898972, - "grad_norm": 2.502356767654419, - "learning_rate": 4.6041007325683505e-05, - "loss": 0.9187, - "step": 26870 - }, - { - "epoch": 0.23762796371930198, - "grad_norm": 3.20857572555542, - "learning_rate": 4.6039533938011633e-05, - "loss": 0.8616, - "step": 26880 - }, - { - "epoch": 0.2377163669796142, - "grad_norm": 3.5631120204925537, - "learning_rate": 4.603806055033977e-05, - "loss": 0.7893, - "step": 26890 - }, - { - "epoch": 0.23780477023992644, - "grad_norm": 1.9661896228790283, - "learning_rate": 4.603658716266789e-05, - "loss": 0.7289, - "step": 26900 - }, - { - "epoch": 0.2378931735002387, - "grad_norm": 16.530513763427734, - "learning_rate": 4.6035113774996025e-05, - "loss": 0.7672, - "step": 26910 - }, - { - "epoch": 0.23798157676055093, - "grad_norm": 5.208324909210205, - "learning_rate": 4.6033640387324154e-05, - "loss": 0.5701, - "step": 26920 - }, - { - "epoch": 0.23806998002086316, - "grad_norm": 6.618350982666016, - "learning_rate": 4.603216699965228e-05, - "loss": 0.8072, - "step": 26930 - }, - { - "epoch": 0.23815838328117542, - "grad_norm": 5.809628009796143, - "learning_rate": 4.603069361198041e-05, - "loss": 0.6595, - "step": 26940 - }, - { - "epoch": 0.23824678654148765, - "grad_norm": 4.600014686584473, - "learning_rate": 4.602922022430854e-05, - "loss": 0.724, - "step": 26950 - }, - { - "epoch": 0.23833518980179988, - "grad_norm": 7.521449089050293, - "learning_rate": 4.602774683663667e-05, - "loss": 0.965, - "step": 26960 - }, - { - "epoch": 0.23842359306211214, - "grad_norm": 6.910586357116699, - "learning_rate": 4.60262734489648e-05, - "loss": 0.7996, - "step": 26970 - }, - { - "epoch": 0.23851199632242437, - "grad_norm": 2.6404194831848145, - "learning_rate": 4.602480006129293e-05, - "loss": 0.7884, - "step": 26980 - }, - { - "epoch": 0.2386003995827366, - "grad_norm": 3.6584150791168213, - "learning_rate": 4.602332667362106e-05, - "loss": 0.5845, - "step": 26990 - }, - { - "epoch": 0.23868880284304886, - "grad_norm": 3.9669134616851807, - "learning_rate": 4.602185328594919e-05, - "loss": 0.7716, - "step": 27000 - }, - { - "epoch": 0.2387772061033611, - "grad_norm": 3.2543506622314453, - "learning_rate": 4.6020379898277316e-05, - "loss": 0.732, - "step": 27010 - }, - { - "epoch": 0.23886560936367332, - "grad_norm": 6.32755708694458, - "learning_rate": 4.6018906510605444e-05, - "loss": 0.7717, - "step": 27020 - }, - { - "epoch": 0.23895401262398558, - "grad_norm": 3.7160096168518066, - "learning_rate": 4.601743312293358e-05, - "loss": 0.7618, - "step": 27030 - }, - { - "epoch": 0.2390424158842978, - "grad_norm": 4.574923515319824, - "learning_rate": 4.601595973526171e-05, - "loss": 0.7261, - "step": 27040 - }, - { - "epoch": 0.23913081914461004, - "grad_norm": 8.13255500793457, - "learning_rate": 4.6014486347589836e-05, - "loss": 0.7097, - "step": 27050 - }, - { - "epoch": 0.2392192224049223, - "grad_norm": 5.212277412414551, - "learning_rate": 4.6013012959917964e-05, - "loss": 0.7916, - "step": 27060 - }, - { - "epoch": 0.23930762566523453, - "grad_norm": 7.050914764404297, - "learning_rate": 4.601153957224609e-05, - "loss": 0.6378, - "step": 27070 - }, - { - "epoch": 0.23939602892554676, - "grad_norm": 2.6058921813964844, - "learning_rate": 4.601006618457422e-05, - "loss": 0.7586, - "step": 27080 - }, - { - "epoch": 0.23948443218585902, - "grad_norm": 2.754028081893921, - "learning_rate": 4.600859279690235e-05, - "loss": 0.8515, - "step": 27090 - }, - { - "epoch": 0.23957283544617125, - "grad_norm": 4.837860107421875, - "learning_rate": 4.6007119409230484e-05, - "loss": 0.8845, - "step": 27100 - }, - { - "epoch": 0.23966123870648348, - "grad_norm": 3.7284557819366455, - "learning_rate": 4.600564602155861e-05, - "loss": 0.9217, - "step": 27110 - }, - { - "epoch": 0.23974964196679574, - "grad_norm": 3.5191853046417236, - "learning_rate": 4.600417263388674e-05, - "loss": 0.7213, - "step": 27120 - }, - { - "epoch": 0.23983804522710797, - "grad_norm": 4.532087802886963, - "learning_rate": 4.600269924621487e-05, - "loss": 0.8679, - "step": 27130 - }, - { - "epoch": 0.23992644848742023, - "grad_norm": 4.2826337814331055, - "learning_rate": 4.6001225858543e-05, - "loss": 0.6832, - "step": 27140 - }, - { - "epoch": 0.24001485174773246, - "grad_norm": 2.4995365142822266, - "learning_rate": 4.5999752470871126e-05, - "loss": 0.583, - "step": 27150 - }, - { - "epoch": 0.2401032550080447, - "grad_norm": 3.484452247619629, - "learning_rate": 4.599827908319926e-05, - "loss": 0.7244, - "step": 27160 - }, - { - "epoch": 0.24019165826835695, - "grad_norm": 5.703882217407227, - "learning_rate": 4.599680569552738e-05, - "loss": 0.7813, - "step": 27170 - }, - { - "epoch": 0.24028006152866918, - "grad_norm": 8.513440132141113, - "learning_rate": 4.599533230785552e-05, - "loss": 0.8008, - "step": 27180 - }, - { - "epoch": 0.2403684647889814, - "grad_norm": 3.886124610900879, - "learning_rate": 4.5993858920183646e-05, - "loss": 0.8106, - "step": 27190 - }, - { - "epoch": 0.24045686804929367, - "grad_norm": 2.080432415008545, - "learning_rate": 4.5992385532511775e-05, - "loss": 0.775, - "step": 27200 - }, - { - "epoch": 0.2405452713096059, - "grad_norm": 4.064215660095215, - "learning_rate": 4.59909121448399e-05, - "loss": 0.7833, - "step": 27210 - }, - { - "epoch": 0.24063367456991813, - "grad_norm": 1.8495577573776245, - "learning_rate": 4.598943875716804e-05, - "loss": 0.7723, - "step": 27220 - }, - { - "epoch": 0.2407220778302304, - "grad_norm": 1.6558035612106323, - "learning_rate": 4.598796536949616e-05, - "loss": 0.8189, - "step": 27230 - }, - { - "epoch": 0.24081048109054262, - "grad_norm": 4.51279878616333, - "learning_rate": 4.5986491981824295e-05, - "loss": 0.7964, - "step": 27240 - }, - { - "epoch": 0.24089888435085485, - "grad_norm": 9.106587409973145, - "learning_rate": 4.598501859415242e-05, - "loss": 0.8, - "step": 27250 - }, - { - "epoch": 0.2409872876111671, - "grad_norm": 3.420579671859741, - "learning_rate": 4.598354520648055e-05, - "loss": 0.7039, - "step": 27260 - }, - { - "epoch": 0.24107569087147934, - "grad_norm": 9.412304878234863, - "learning_rate": 4.598207181880868e-05, - "loss": 0.8447, - "step": 27270 - }, - { - "epoch": 0.24116409413179157, - "grad_norm": 1.5723434686660767, - "learning_rate": 4.598059843113681e-05, - "loss": 0.7944, - "step": 27280 - }, - { - "epoch": 0.24125249739210383, - "grad_norm": 2.9342610836029053, - "learning_rate": 4.597912504346494e-05, - "loss": 0.8458, - "step": 27290 - }, - { - "epoch": 0.24134090065241606, - "grad_norm": 11.885449409484863, - "learning_rate": 4.597765165579307e-05, - "loss": 0.7906, - "step": 27300 - }, - { - "epoch": 0.2414293039127283, - "grad_norm": 3.185608148574829, - "learning_rate": 4.597617826812119e-05, - "loss": 0.9034, - "step": 27310 - }, - { - "epoch": 0.24151770717304055, - "grad_norm": 5.354928970336914, - "learning_rate": 4.597470488044933e-05, - "loss": 0.8559, - "step": 27320 - }, - { - "epoch": 0.24160611043335278, - "grad_norm": 3.766575336456299, - "learning_rate": 4.597323149277746e-05, - "loss": 0.7863, - "step": 27330 - }, - { - "epoch": 0.24169451369366501, - "grad_norm": 3.2981998920440674, - "learning_rate": 4.5971758105105585e-05, - "loss": 0.8559, - "step": 27340 - }, - { - "epoch": 0.24178291695397727, - "grad_norm": 5.427745819091797, - "learning_rate": 4.5970284717433714e-05, - "loss": 0.9341, - "step": 27350 - }, - { - "epoch": 0.2418713202142895, - "grad_norm": 8.101058959960938, - "learning_rate": 4.596881132976185e-05, - "loss": 0.7719, - "step": 27360 - }, - { - "epoch": 0.24195972347460173, - "grad_norm": 2.6489789485931396, - "learning_rate": 4.596733794208997e-05, - "loss": 0.6542, - "step": 27370 - }, - { - "epoch": 0.242048126734914, - "grad_norm": 5.422354221343994, - "learning_rate": 4.5965864554418105e-05, - "loss": 0.889, - "step": 27380 - }, - { - "epoch": 0.24213652999522622, - "grad_norm": 3.1926510334014893, - "learning_rate": 4.5964391166746234e-05, - "loss": 0.8579, - "step": 27390 - }, - { - "epoch": 0.24222493325553845, - "grad_norm": 7.074790000915527, - "learning_rate": 4.596291777907436e-05, - "loss": 0.7468, - "step": 27400 - }, - { - "epoch": 0.2423133365158507, - "grad_norm": 3.0544276237487793, - "learning_rate": 4.596144439140249e-05, - "loss": 0.6276, - "step": 27410 - }, - { - "epoch": 0.24240173977616294, - "grad_norm": 2.9188649654388428, - "learning_rate": 4.595997100373062e-05, - "loss": 0.8359, - "step": 27420 - }, - { - "epoch": 0.24249014303647518, - "grad_norm": 8.019341468811035, - "learning_rate": 4.595849761605875e-05, - "loss": 0.7151, - "step": 27430 - }, - { - "epoch": 0.24257854629678743, - "grad_norm": 3.7809083461761475, - "learning_rate": 4.595702422838688e-05, - "loss": 0.7658, - "step": 27440 - }, - { - "epoch": 0.24266694955709966, - "grad_norm": 2.2371914386749268, - "learning_rate": 4.5955550840715004e-05, - "loss": 0.6999, - "step": 27450 - }, - { - "epoch": 0.2427553528174119, - "grad_norm": 2.8070926666259766, - "learning_rate": 4.595407745304314e-05, - "loss": 0.7651, - "step": 27460 - }, - { - "epoch": 0.24284375607772415, - "grad_norm": 1.8410590887069702, - "learning_rate": 4.595260406537127e-05, - "loss": 0.6764, - "step": 27470 - }, - { - "epoch": 0.24293215933803639, - "grad_norm": 1.984129786491394, - "learning_rate": 4.5951130677699396e-05, - "loss": 0.6803, - "step": 27480 - }, - { - "epoch": 0.24302056259834862, - "grad_norm": 4.3879313468933105, - "learning_rate": 4.5949657290027524e-05, - "loss": 0.8164, - "step": 27490 - }, - { - "epoch": 0.24310896585866087, - "grad_norm": 2.5979106426239014, - "learning_rate": 4.594818390235566e-05, - "loss": 0.7351, - "step": 27500 - }, - { - "epoch": 0.2431973691189731, - "grad_norm": 8.960977554321289, - "learning_rate": 4.594671051468378e-05, - "loss": 0.8408, - "step": 27510 - }, - { - "epoch": 0.24328577237928534, - "grad_norm": 5.437119960784912, - "learning_rate": 4.5945237127011916e-05, - "loss": 0.7397, - "step": 27520 - }, - { - "epoch": 0.2433741756395976, - "grad_norm": 4.247231960296631, - "learning_rate": 4.594376373934004e-05, - "loss": 0.7607, - "step": 27530 - }, - { - "epoch": 0.24346257889990983, - "grad_norm": 7.173487663269043, - "learning_rate": 4.594229035166817e-05, - "loss": 0.7898, - "step": 27540 - }, - { - "epoch": 0.24355098216022206, - "grad_norm": 4.7880682945251465, - "learning_rate": 4.59408169639963e-05, - "loss": 0.7287, - "step": 27550 - }, - { - "epoch": 0.24363938542053432, - "grad_norm": 2.163510322570801, - "learning_rate": 4.593934357632443e-05, - "loss": 0.8242, - "step": 27560 - }, - { - "epoch": 0.24372778868084655, - "grad_norm": 2.545175313949585, - "learning_rate": 4.593787018865256e-05, - "loss": 0.7271, - "step": 27570 - }, - { - "epoch": 0.24381619194115878, - "grad_norm": 7.023959636688232, - "learning_rate": 4.593639680098069e-05, - "loss": 0.755, - "step": 27580 - }, - { - "epoch": 0.24390459520147104, - "grad_norm": 5.333517551422119, - "learning_rate": 4.5934923413308814e-05, - "loss": 0.8837, - "step": 27590 - }, - { - "epoch": 0.24399299846178327, - "grad_norm": 4.334492206573486, - "learning_rate": 4.593345002563695e-05, - "loss": 0.8622, - "step": 27600 - }, - { - "epoch": 0.2440814017220955, - "grad_norm": 5.533698558807373, - "learning_rate": 4.593197663796508e-05, - "loss": 0.8461, - "step": 27610 - }, - { - "epoch": 0.24416980498240776, - "grad_norm": 12.364745140075684, - "learning_rate": 4.5930503250293206e-05, - "loss": 0.7126, - "step": 27620 - }, - { - "epoch": 0.24425820824272, - "grad_norm": 7.118648052215576, - "learning_rate": 4.5929029862621335e-05, - "loss": 0.7628, - "step": 27630 - }, - { - "epoch": 0.24434661150303222, - "grad_norm": 3.5467753410339355, - "learning_rate": 4.592755647494946e-05, - "loss": 0.7172, - "step": 27640 - }, - { - "epoch": 0.24443501476334448, - "grad_norm": 3.2943429946899414, - "learning_rate": 4.592608308727759e-05, - "loss": 0.6839, - "step": 27650 - }, - { - "epoch": 0.2445234180236567, - "grad_norm": 5.245155334472656, - "learning_rate": 4.5924609699605726e-05, - "loss": 0.7213, - "step": 27660 - }, - { - "epoch": 0.24461182128396897, - "grad_norm": 4.831164360046387, - "learning_rate": 4.592313631193385e-05, - "loss": 0.8247, - "step": 27670 - }, - { - "epoch": 0.2447002245442812, - "grad_norm": 4.629475116729736, - "learning_rate": 4.592166292426198e-05, - "loss": 0.7381, - "step": 27680 - }, - { - "epoch": 0.24478862780459343, - "grad_norm": 6.993200302124023, - "learning_rate": 4.592018953659011e-05, - "loss": 0.6655, - "step": 27690 - }, - { - "epoch": 0.2448770310649057, - "grad_norm": 3.4463162422180176, - "learning_rate": 4.591871614891824e-05, - "loss": 0.8809, - "step": 27700 - }, - { - "epoch": 0.24496543432521792, - "grad_norm": 3.353153705596924, - "learning_rate": 4.591724276124637e-05, - "loss": 0.7817, - "step": 27710 - }, - { - "epoch": 0.24505383758553015, - "grad_norm": 8.690110206604004, - "learning_rate": 4.59157693735745e-05, - "loss": 0.8445, - "step": 27720 - }, - { - "epoch": 0.2451422408458424, - "grad_norm": 5.797652721405029, - "learning_rate": 4.5914295985902625e-05, - "loss": 0.6521, - "step": 27730 - }, - { - "epoch": 0.24523064410615464, - "grad_norm": 7.199275016784668, - "learning_rate": 4.591282259823076e-05, - "loss": 0.7692, - "step": 27740 - }, - { - "epoch": 0.24531904736646687, - "grad_norm": 7.3925886154174805, - "learning_rate": 4.591134921055888e-05, - "loss": 0.8059, - "step": 27750 - }, - { - "epoch": 0.24540745062677913, - "grad_norm": 4.901026725769043, - "learning_rate": 4.590987582288702e-05, - "loss": 0.82, - "step": 27760 - }, - { - "epoch": 0.24549585388709136, - "grad_norm": 5.541647434234619, - "learning_rate": 4.5908402435215145e-05, - "loss": 0.7396, - "step": 27770 - }, - { - "epoch": 0.2455842571474036, - "grad_norm": 3.0398240089416504, - "learning_rate": 4.5906929047543273e-05, - "loss": 0.715, - "step": 27780 - }, - { - "epoch": 0.24567266040771585, - "grad_norm": 6.12975549697876, - "learning_rate": 4.59054556598714e-05, - "loss": 0.7317, - "step": 27790 - }, - { - "epoch": 0.24576106366802808, - "grad_norm": 5.894838809967041, - "learning_rate": 4.590398227219954e-05, - "loss": 0.9235, - "step": 27800 - }, - { - "epoch": 0.2458494669283403, - "grad_norm": 4.738114833831787, - "learning_rate": 4.590250888452766e-05, - "loss": 0.8374, - "step": 27810 - }, - { - "epoch": 0.24593787018865257, - "grad_norm": 3.513248920440674, - "learning_rate": 4.5901035496855794e-05, - "loss": 0.7429, - "step": 27820 - }, - { - "epoch": 0.2460262734489648, - "grad_norm": 7.718358039855957, - "learning_rate": 4.589956210918392e-05, - "loss": 0.7613, - "step": 27830 - }, - { - "epoch": 0.24611467670927703, - "grad_norm": 2.4155099391937256, - "learning_rate": 4.589808872151205e-05, - "loss": 0.8053, - "step": 27840 - }, - { - "epoch": 0.2462030799695893, - "grad_norm": 2.726713180541992, - "learning_rate": 4.589661533384018e-05, - "loss": 0.7494, - "step": 27850 - }, - { - "epoch": 0.24629148322990152, - "grad_norm": 4.137597560882568, - "learning_rate": 4.5895141946168314e-05, - "loss": 0.7612, - "step": 27860 - }, - { - "epoch": 0.24637988649021375, - "grad_norm": 4.160385608673096, - "learning_rate": 4.5893668558496435e-05, - "loss": 0.7191, - "step": 27870 - }, - { - "epoch": 0.246468289750526, - "grad_norm": 4.072481155395508, - "learning_rate": 4.589219517082457e-05, - "loss": 0.8007, - "step": 27880 - }, - { - "epoch": 0.24655669301083824, - "grad_norm": 7.5308637619018555, - "learning_rate": 4.58907217831527e-05, - "loss": 0.8299, - "step": 27890 - }, - { - "epoch": 0.24664509627115047, - "grad_norm": 6.215305328369141, - "learning_rate": 4.588924839548083e-05, - "loss": 0.7622, - "step": 27900 - }, - { - "epoch": 0.24673349953146273, - "grad_norm": 3.666205406188965, - "learning_rate": 4.5887775007808956e-05, - "loss": 0.6522, - "step": 27910 - }, - { - "epoch": 0.24682190279177496, - "grad_norm": 4.303045749664307, - "learning_rate": 4.5886301620137084e-05, - "loss": 0.8075, - "step": 27920 - }, - { - "epoch": 0.2469103060520872, - "grad_norm": 3.4121458530426025, - "learning_rate": 4.588482823246521e-05, - "loss": 0.7274, - "step": 27930 - }, - { - "epoch": 0.24699870931239945, - "grad_norm": 7.0728936195373535, - "learning_rate": 4.588335484479335e-05, - "loss": 0.9065, - "step": 27940 - }, - { - "epoch": 0.24708711257271168, - "grad_norm": 2.6692259311676025, - "learning_rate": 4.5881881457121476e-05, - "loss": 0.7414, - "step": 27950 - }, - { - "epoch": 0.2471755158330239, - "grad_norm": 4.277823448181152, - "learning_rate": 4.5880408069449604e-05, - "loss": 0.7551, - "step": 27960 - }, - { - "epoch": 0.24726391909333617, - "grad_norm": 6.4987921714782715, - "learning_rate": 4.587893468177773e-05, - "loss": 0.8438, - "step": 27970 - }, - { - "epoch": 0.2473523223536484, - "grad_norm": 3.7967724800109863, - "learning_rate": 4.587746129410586e-05, - "loss": 0.8001, - "step": 27980 - }, - { - "epoch": 0.24744072561396063, - "grad_norm": 8.63526725769043, - "learning_rate": 4.5875987906433996e-05, - "loss": 0.8722, - "step": 27990 - }, - { - "epoch": 0.2475291288742729, - "grad_norm": 4.242366313934326, - "learning_rate": 4.587451451876212e-05, - "loss": 0.7051, - "step": 28000 - }, - { - "epoch": 0.24761753213458512, - "grad_norm": 10.421416282653809, - "learning_rate": 4.587304113109025e-05, - "loss": 0.7388, - "step": 28010 - }, - { - "epoch": 0.24770593539489735, - "grad_norm": 2.73309588432312, - "learning_rate": 4.587156774341838e-05, - "loss": 0.6632, - "step": 28020 - }, - { - "epoch": 0.2477943386552096, - "grad_norm": 6.762021064758301, - "learning_rate": 4.587009435574651e-05, - "loss": 0.7726, - "step": 28030 - }, - { - "epoch": 0.24788274191552184, - "grad_norm": 2.386453866958618, - "learning_rate": 4.586862096807464e-05, - "loss": 0.8197, - "step": 28040 - }, - { - "epoch": 0.24797114517583407, - "grad_norm": 2.4636244773864746, - "learning_rate": 4.586714758040277e-05, - "loss": 0.8321, - "step": 28050 - }, - { - "epoch": 0.24805954843614633, - "grad_norm": 5.344876766204834, - "learning_rate": 4.5865674192730894e-05, - "loss": 0.9034, - "step": 28060 - }, - { - "epoch": 0.24814795169645856, - "grad_norm": 2.711806297302246, - "learning_rate": 4.586420080505903e-05, - "loss": 0.7574, - "step": 28070 - }, - { - "epoch": 0.2482363549567708, - "grad_norm": 3.473045825958252, - "learning_rate": 4.586272741738716e-05, - "loss": 0.7393, - "step": 28080 - }, - { - "epoch": 0.24832475821708305, - "grad_norm": 9.402650833129883, - "learning_rate": 4.5861254029715286e-05, - "loss": 0.7757, - "step": 28090 - }, - { - "epoch": 0.24841316147739528, - "grad_norm": 2.2732865810394287, - "learning_rate": 4.5859780642043415e-05, - "loss": 0.7385, - "step": 28100 - }, - { - "epoch": 0.24850156473770751, - "grad_norm": 2.804537534713745, - "learning_rate": 4.585830725437154e-05, - "loss": 0.7192, - "step": 28110 - }, - { - "epoch": 0.24858996799801977, - "grad_norm": 12.078184127807617, - "learning_rate": 4.585683386669967e-05, - "loss": 0.7096, - "step": 28120 - }, - { - "epoch": 0.248678371258332, - "grad_norm": 5.753732681274414, - "learning_rate": 4.5855360479027806e-05, - "loss": 0.7176, - "step": 28130 - }, - { - "epoch": 0.24876677451864423, - "grad_norm": 10.479565620422363, - "learning_rate": 4.585388709135593e-05, - "loss": 0.7143, - "step": 28140 - }, - { - "epoch": 0.2488551777789565, - "grad_norm": 3.086230993270874, - "learning_rate": 4.585241370368406e-05, - "loss": 0.9096, - "step": 28150 - }, - { - "epoch": 0.24894358103926872, - "grad_norm": 4.537883281707764, - "learning_rate": 4.585094031601219e-05, - "loss": 0.8254, - "step": 28160 - }, - { - "epoch": 0.24903198429958096, - "grad_norm": 5.582494258880615, - "learning_rate": 4.584946692834032e-05, - "loss": 0.8814, - "step": 28170 - }, - { - "epoch": 0.2491203875598932, - "grad_norm": 4.154595851898193, - "learning_rate": 4.584799354066845e-05, - "loss": 0.7157, - "step": 28180 - }, - { - "epoch": 0.24920879082020544, - "grad_norm": 5.340351581573486, - "learning_rate": 4.584652015299658e-05, - "loss": 0.7943, - "step": 28190 - }, - { - "epoch": 0.2492971940805177, - "grad_norm": 14.03711986541748, - "learning_rate": 4.5845046765324705e-05, - "loss": 0.8136, - "step": 28200 - }, - { - "epoch": 0.24938559734082993, - "grad_norm": 3.375035285949707, - "learning_rate": 4.584357337765284e-05, - "loss": 0.7918, - "step": 28210 - }, - { - "epoch": 0.24947400060114217, - "grad_norm": 5.254472255706787, - "learning_rate": 4.584209998998096e-05, - "loss": 0.6279, - "step": 28220 - }, - { - "epoch": 0.24956240386145442, - "grad_norm": 7.013589382171631, - "learning_rate": 4.58406266023091e-05, - "loss": 1.0012, - "step": 28230 - }, - { - "epoch": 0.24965080712176665, - "grad_norm": 15.84322452545166, - "learning_rate": 4.5839153214637225e-05, - "loss": 0.8033, - "step": 28240 - }, - { - "epoch": 0.24973921038207889, - "grad_norm": 4.328507423400879, - "learning_rate": 4.5837679826965353e-05, - "loss": 0.6547, - "step": 28250 - }, - { - "epoch": 0.24982761364239114, - "grad_norm": 3.8125171661376953, - "learning_rate": 4.583620643929348e-05, - "loss": 0.8319, - "step": 28260 - }, - { - "epoch": 0.24991601690270338, - "grad_norm": 1.9388508796691895, - "learning_rate": 4.583473305162162e-05, - "loss": 0.9568, - "step": 28270 - }, - { - "epoch": 0.2500044201630156, - "grad_norm": 6.131495475769043, - "learning_rate": 4.583325966394974e-05, - "loss": 0.8844, - "step": 28280 - }, - { - "epoch": 0.25009282342332784, - "grad_norm": 1.8883302211761475, - "learning_rate": 4.5831786276277874e-05, - "loss": 0.771, - "step": 28290 - }, - { - "epoch": 0.25018122668364007, - "grad_norm": 3.7101242542266846, - "learning_rate": 4.5830312888606e-05, - "loss": 0.6934, - "step": 28300 - }, - { - "epoch": 0.25026962994395235, - "grad_norm": 7.083991527557373, - "learning_rate": 4.582883950093413e-05, - "loss": 0.7457, - "step": 28310 - }, - { - "epoch": 0.2503580332042646, - "grad_norm": 6.361082077026367, - "learning_rate": 4.582736611326226e-05, - "loss": 0.6809, - "step": 28320 - }, - { - "epoch": 0.2504464364645768, - "grad_norm": 4.989359378814697, - "learning_rate": 4.5825892725590394e-05, - "loss": 0.8322, - "step": 28330 - }, - { - "epoch": 0.25053483972488905, - "grad_norm": 10.848586082458496, - "learning_rate": 4.5824419337918515e-05, - "loss": 0.7794, - "step": 28340 - }, - { - "epoch": 0.2506232429852013, - "grad_norm": 2.3265511989593506, - "learning_rate": 4.582294595024665e-05, - "loss": 0.8029, - "step": 28350 - }, - { - "epoch": 0.2507116462455135, - "grad_norm": 5.207944869995117, - "learning_rate": 4.582147256257477e-05, - "loss": 0.7481, - "step": 28360 - }, - { - "epoch": 0.2508000495058258, - "grad_norm": 11.006331443786621, - "learning_rate": 4.581999917490291e-05, - "loss": 0.866, - "step": 28370 - }, - { - "epoch": 0.250888452766138, - "grad_norm": 5.190815448760986, - "learning_rate": 4.5818525787231036e-05, - "loss": 0.7849, - "step": 28380 - }, - { - "epoch": 0.25097685602645026, - "grad_norm": 3.069892168045044, - "learning_rate": 4.5817052399559164e-05, - "loss": 0.7629, - "step": 28390 - }, - { - "epoch": 0.2510652592867625, - "grad_norm": 3.2880120277404785, - "learning_rate": 4.581557901188729e-05, - "loss": 0.7874, - "step": 28400 - }, - { - "epoch": 0.2511536625470747, - "grad_norm": 6.7434401512146, - "learning_rate": 4.581410562421543e-05, - "loss": 0.7109, - "step": 28410 - }, - { - "epoch": 0.25124206580738695, - "grad_norm": 4.0119524002075195, - "learning_rate": 4.581263223654355e-05, - "loss": 0.7303, - "step": 28420 - }, - { - "epoch": 0.25133046906769924, - "grad_norm": 8.777091979980469, - "learning_rate": 4.5811158848871684e-05, - "loss": 0.7814, - "step": 28430 - }, - { - "epoch": 0.25141887232801147, - "grad_norm": 6.044012069702148, - "learning_rate": 4.580968546119981e-05, - "loss": 0.8355, - "step": 28440 - }, - { - "epoch": 0.2515072755883237, - "grad_norm": 4.43526554107666, - "learning_rate": 4.580821207352794e-05, - "loss": 0.8782, - "step": 28450 - }, - { - "epoch": 0.25159567884863593, - "grad_norm": 6.353603363037109, - "learning_rate": 4.580673868585607e-05, - "loss": 0.822, - "step": 28460 - }, - { - "epoch": 0.25168408210894816, - "grad_norm": 7.672333717346191, - "learning_rate": 4.58052652981842e-05, - "loss": 0.7529, - "step": 28470 - }, - { - "epoch": 0.25177248536926045, - "grad_norm": 1.5266413688659668, - "learning_rate": 4.5803791910512326e-05, - "loss": 0.9385, - "step": 28480 - }, - { - "epoch": 0.2518608886295727, - "grad_norm": 2.4057815074920654, - "learning_rate": 4.580231852284046e-05, - "loss": 0.836, - "step": 28490 - }, - { - "epoch": 0.2519492918898849, - "grad_norm": 5.290966510772705, - "learning_rate": 4.580084513516858e-05, - "loss": 0.7352, - "step": 28500 - }, - { - "epoch": 0.25203769515019714, - "grad_norm": 5.318004131317139, - "learning_rate": 4.579937174749672e-05, - "loss": 0.7593, - "step": 28510 - }, - { - "epoch": 0.25212609841050937, - "grad_norm": 10.14842414855957, - "learning_rate": 4.5797898359824846e-05, - "loss": 0.7628, - "step": 28520 - }, - { - "epoch": 0.2522145016708216, - "grad_norm": 8.42928409576416, - "learning_rate": 4.5796424972152975e-05, - "loss": 0.6934, - "step": 28530 - }, - { - "epoch": 0.2523029049311339, - "grad_norm": 5.067338943481445, - "learning_rate": 4.57949515844811e-05, - "loss": 0.72, - "step": 28540 - }, - { - "epoch": 0.2523913081914461, - "grad_norm": 4.782569408416748, - "learning_rate": 4.579347819680924e-05, - "loss": 0.8456, - "step": 28550 - }, - { - "epoch": 0.25247971145175835, - "grad_norm": 2.653374195098877, - "learning_rate": 4.579200480913736e-05, - "loss": 0.8257, - "step": 28560 - }, - { - "epoch": 0.2525681147120706, - "grad_norm": 4.4330854415893555, - "learning_rate": 4.5790531421465495e-05, - "loss": 0.778, - "step": 28570 - }, - { - "epoch": 0.2526565179723828, - "grad_norm": 11.04740047454834, - "learning_rate": 4.5789058033793616e-05, - "loss": 0.7596, - "step": 28580 - }, - { - "epoch": 0.25274492123269504, - "grad_norm": 3.77433705329895, - "learning_rate": 4.578758464612175e-05, - "loss": 0.9691, - "step": 28590 - }, - { - "epoch": 0.2528333244930073, - "grad_norm": 6.705318927764893, - "learning_rate": 4.578611125844988e-05, - "loss": 0.8182, - "step": 28600 - }, - { - "epoch": 0.25292172775331956, - "grad_norm": 5.084736347198486, - "learning_rate": 4.578463787077801e-05, - "loss": 0.8034, - "step": 28610 - }, - { - "epoch": 0.2530101310136318, - "grad_norm": 5.857778549194336, - "learning_rate": 4.5783164483106136e-05, - "loss": 0.5936, - "step": 28620 - }, - { - "epoch": 0.253098534273944, - "grad_norm": 4.9835124015808105, - "learning_rate": 4.578169109543427e-05, - "loss": 0.7461, - "step": 28630 - }, - { - "epoch": 0.25318693753425625, - "grad_norm": 5.676827907562256, - "learning_rate": 4.578021770776239e-05, - "loss": 0.8185, - "step": 28640 - }, - { - "epoch": 0.2532753407945685, - "grad_norm": 4.298700332641602, - "learning_rate": 4.577874432009053e-05, - "loss": 0.8346, - "step": 28650 - }, - { - "epoch": 0.25336374405488077, - "grad_norm": 3.056025981903076, - "learning_rate": 4.577727093241866e-05, - "loss": 0.7172, - "step": 28660 - }, - { - "epoch": 0.253452147315193, - "grad_norm": 4.879296779632568, - "learning_rate": 4.5775797544746785e-05, - "loss": 0.7053, - "step": 28670 - }, - { - "epoch": 0.25354055057550523, - "grad_norm": 6.258749008178711, - "learning_rate": 4.577432415707491e-05, - "loss": 0.8798, - "step": 28680 - }, - { - "epoch": 0.25362895383581746, - "grad_norm": 3.5010361671447754, - "learning_rate": 4.577285076940304e-05, - "loss": 0.8394, - "step": 28690 - }, - { - "epoch": 0.2537173570961297, - "grad_norm": 5.559300899505615, - "learning_rate": 4.577137738173117e-05, - "loss": 0.7309, - "step": 28700 - }, - { - "epoch": 0.2538057603564419, - "grad_norm": 2.909944534301758, - "learning_rate": 4.5769903994059305e-05, - "loss": 0.6919, - "step": 28710 - }, - { - "epoch": 0.2538941636167542, - "grad_norm": 8.954793930053711, - "learning_rate": 4.5768430606387434e-05, - "loss": 0.8466, - "step": 28720 - }, - { - "epoch": 0.25398256687706644, - "grad_norm": 3.158184289932251, - "learning_rate": 4.576695721871556e-05, - "loss": 0.7055, - "step": 28730 - }, - { - "epoch": 0.25407097013737867, - "grad_norm": 1.5547363758087158, - "learning_rate": 4.576548383104369e-05, - "loss": 0.8503, - "step": 28740 - }, - { - "epoch": 0.2541593733976909, - "grad_norm": 11.266592025756836, - "learning_rate": 4.576401044337182e-05, - "loss": 0.6418, - "step": 28750 - }, - { - "epoch": 0.25424777665800313, - "grad_norm": 4.114709377288818, - "learning_rate": 4.576253705569995e-05, - "loss": 0.8064, - "step": 28760 - }, - { - "epoch": 0.25433617991831536, - "grad_norm": 1.7465564012527466, - "learning_rate": 4.576106366802808e-05, - "loss": 0.8276, - "step": 28770 - }, - { - "epoch": 0.25442458317862765, - "grad_norm": 5.222322463989258, - "learning_rate": 4.575959028035621e-05, - "loss": 0.7708, - "step": 28780 - }, - { - "epoch": 0.2545129864389399, - "grad_norm": 2.0481183528900146, - "learning_rate": 4.575811689268434e-05, - "loss": 0.7236, - "step": 28790 - }, - { - "epoch": 0.2546013896992521, - "grad_norm": 1.1635178327560425, - "learning_rate": 4.575664350501247e-05, - "loss": 0.6725, - "step": 28800 - }, - { - "epoch": 0.25468979295956434, - "grad_norm": 2.7443790435791016, - "learning_rate": 4.5755170117340596e-05, - "loss": 0.7547, - "step": 28810 - }, - { - "epoch": 0.2547781962198766, - "grad_norm": 5.969183444976807, - "learning_rate": 4.5753696729668724e-05, - "loss": 0.8911, - "step": 28820 - }, - { - "epoch": 0.2548665994801888, - "grad_norm": 5.39157772064209, - "learning_rate": 4.575222334199685e-05, - "loss": 0.8314, - "step": 28830 - }, - { - "epoch": 0.2549550027405011, - "grad_norm": 5.395505428314209, - "learning_rate": 4.575074995432499e-05, - "loss": 0.8861, - "step": 28840 - }, - { - "epoch": 0.2550434060008133, - "grad_norm": 12.572649002075195, - "learning_rate": 4.5749276566653116e-05, - "loss": 0.8176, - "step": 28850 - }, - { - "epoch": 0.25513180926112555, - "grad_norm": 3.854978561401367, - "learning_rate": 4.5747803178981244e-05, - "loss": 0.6201, - "step": 28860 - }, - { - "epoch": 0.2552202125214378, - "grad_norm": 3.417965888977051, - "learning_rate": 4.574632979130937e-05, - "loss": 0.6962, - "step": 28870 - }, - { - "epoch": 0.25530861578175, - "grad_norm": 4.227417945861816, - "learning_rate": 4.57448564036375e-05, - "loss": 0.7025, - "step": 28880 - }, - { - "epoch": 0.25539701904206225, - "grad_norm": 6.945284366607666, - "learning_rate": 4.574338301596563e-05, - "loss": 0.6971, - "step": 28890 - }, - { - "epoch": 0.25548542230237453, - "grad_norm": 6.564403533935547, - "learning_rate": 4.5741909628293764e-05, - "loss": 0.836, - "step": 28900 - }, - { - "epoch": 0.25557382556268676, - "grad_norm": 4.663499355316162, - "learning_rate": 4.574043624062189e-05, - "loss": 0.8684, - "step": 28910 - }, - { - "epoch": 0.255662228822999, - "grad_norm": 4.455374240875244, - "learning_rate": 4.573896285295002e-05, - "loss": 0.8096, - "step": 28920 - }, - { - "epoch": 0.2557506320833112, - "grad_norm": 2.70426869392395, - "learning_rate": 4.573748946527815e-05, - "loss": 0.6601, - "step": 28930 - }, - { - "epoch": 0.25583903534362346, - "grad_norm": 2.442452907562256, - "learning_rate": 4.573601607760628e-05, - "loss": 0.7376, - "step": 28940 - }, - { - "epoch": 0.2559274386039357, - "grad_norm": 3.1411333084106445, - "learning_rate": 4.5734542689934406e-05, - "loss": 0.6878, - "step": 28950 - }, - { - "epoch": 0.256015841864248, - "grad_norm": 4.382972240447998, - "learning_rate": 4.573306930226254e-05, - "loss": 0.878, - "step": 28960 - }, - { - "epoch": 0.2561042451245602, - "grad_norm": 3.5169060230255127, - "learning_rate": 4.573159591459066e-05, - "loss": 0.7822, - "step": 28970 - }, - { - "epoch": 0.25619264838487243, - "grad_norm": 9.13555908203125, - "learning_rate": 4.57301225269188e-05, - "loss": 0.7743, - "step": 28980 - }, - { - "epoch": 0.25628105164518467, - "grad_norm": 5.3839898109436035, - "learning_rate": 4.5728649139246926e-05, - "loss": 0.7427, - "step": 28990 - }, - { - "epoch": 0.2563694549054969, - "grad_norm": 3.092209577560425, - "learning_rate": 4.5727175751575055e-05, - "loss": 0.7135, - "step": 29000 - }, - { - "epoch": 0.2564578581658092, - "grad_norm": 7.3641486167907715, - "learning_rate": 4.572570236390318e-05, - "loss": 0.8581, - "step": 29010 - }, - { - "epoch": 0.2565462614261214, - "grad_norm": 4.167380332946777, - "learning_rate": 4.572422897623132e-05, - "loss": 0.7557, - "step": 29020 - }, - { - "epoch": 0.25663466468643364, - "grad_norm": 2.1778273582458496, - "learning_rate": 4.572275558855944e-05, - "loss": 0.846, - "step": 29030 - }, - { - "epoch": 0.2567230679467459, - "grad_norm": 8.991447448730469, - "learning_rate": 4.5721282200887575e-05, - "loss": 0.6931, - "step": 29040 - }, - { - "epoch": 0.2568114712070581, - "grad_norm": 1.7608896493911743, - "learning_rate": 4.5719808813215696e-05, - "loss": 0.8228, - "step": 29050 - }, - { - "epoch": 0.25689987446737034, - "grad_norm": 4.679460525512695, - "learning_rate": 4.571833542554383e-05, - "loss": 0.6683, - "step": 29060 - }, - { - "epoch": 0.2569882777276826, - "grad_norm": 11.739288330078125, - "learning_rate": 4.571686203787196e-05, - "loss": 0.7965, - "step": 29070 - }, - { - "epoch": 0.25707668098799485, - "grad_norm": 7.693981170654297, - "learning_rate": 4.571538865020009e-05, - "loss": 0.6544, - "step": 29080 - }, - { - "epoch": 0.2571650842483071, - "grad_norm": 4.8238301277160645, - "learning_rate": 4.5713915262528217e-05, - "loss": 0.7453, - "step": 29090 - }, - { - "epoch": 0.2572534875086193, - "grad_norm": 4.031973361968994, - "learning_rate": 4.571244187485635e-05, - "loss": 0.7022, - "step": 29100 - }, - { - "epoch": 0.25734189076893155, - "grad_norm": 13.379624366760254, - "learning_rate": 4.571096848718447e-05, - "loss": 0.7846, - "step": 29110 - }, - { - "epoch": 0.2574302940292438, - "grad_norm": 11.782503128051758, - "learning_rate": 4.570949509951261e-05, - "loss": 0.8556, - "step": 29120 - }, - { - "epoch": 0.25751869728955606, - "grad_norm": 13.237866401672363, - "learning_rate": 4.570802171184074e-05, - "loss": 0.8431, - "step": 29130 - }, - { - "epoch": 0.2576071005498683, - "grad_norm": 3.6365902423858643, - "learning_rate": 4.5706548324168865e-05, - "loss": 0.7829, - "step": 29140 - }, - { - "epoch": 0.2576955038101805, - "grad_norm": 3.789350748062134, - "learning_rate": 4.5705074936496993e-05, - "loss": 0.8309, - "step": 29150 - }, - { - "epoch": 0.25778390707049276, - "grad_norm": 3.392885208129883, - "learning_rate": 4.570360154882512e-05, - "loss": 0.7413, - "step": 29160 - }, - { - "epoch": 0.257872310330805, - "grad_norm": 4.8836750984191895, - "learning_rate": 4.570212816115325e-05, - "loss": 0.7744, - "step": 29170 - }, - { - "epoch": 0.2579607135911172, - "grad_norm": 2.781611204147339, - "learning_rate": 4.5700654773481385e-05, - "loss": 0.7637, - "step": 29180 - }, - { - "epoch": 0.2580491168514295, - "grad_norm": 6.791811943054199, - "learning_rate": 4.569918138580951e-05, - "loss": 0.6883, - "step": 29190 - }, - { - "epoch": 0.25813752011174174, - "grad_norm": 4.718374729156494, - "learning_rate": 4.569770799813764e-05, - "loss": 0.7378, - "step": 29200 - }, - { - "epoch": 0.25822592337205397, - "grad_norm": 3.223762273788452, - "learning_rate": 4.569623461046577e-05, - "loss": 0.8206, - "step": 29210 - }, - { - "epoch": 0.2583143266323662, - "grad_norm": 4.6873650550842285, - "learning_rate": 4.56947612227939e-05, - "loss": 0.8102, - "step": 29220 - }, - { - "epoch": 0.25840272989267843, - "grad_norm": 7.738824367523193, - "learning_rate": 4.569328783512203e-05, - "loss": 0.8461, - "step": 29230 - }, - { - "epoch": 0.25849113315299066, - "grad_norm": 7.177900314331055, - "learning_rate": 4.569181444745016e-05, - "loss": 0.6179, - "step": 29240 - }, - { - "epoch": 0.25857953641330295, - "grad_norm": 4.860195159912109, - "learning_rate": 4.5690341059778284e-05, - "loss": 0.7505, - "step": 29250 - }, - { - "epoch": 0.2586679396736152, - "grad_norm": 9.726078033447266, - "learning_rate": 4.568886767210642e-05, - "loss": 0.6895, - "step": 29260 - }, - { - "epoch": 0.2587563429339274, - "grad_norm": 9.46513557434082, - "learning_rate": 4.568739428443455e-05, - "loss": 0.8684, - "step": 29270 - }, - { - "epoch": 0.25884474619423964, - "grad_norm": 1.5469259023666382, - "learning_rate": 4.5685920896762676e-05, - "loss": 0.7257, - "step": 29280 - }, - { - "epoch": 0.25893314945455187, - "grad_norm": 5.646541595458984, - "learning_rate": 4.5684447509090804e-05, - "loss": 0.7053, - "step": 29290 - }, - { - "epoch": 0.2590215527148641, - "grad_norm": 4.826288223266602, - "learning_rate": 4.568297412141893e-05, - "loss": 0.7533, - "step": 29300 - }, - { - "epoch": 0.2591099559751764, - "grad_norm": 5.6624040603637695, - "learning_rate": 4.568150073374706e-05, - "loss": 0.6851, - "step": 29310 - }, - { - "epoch": 0.2591983592354886, - "grad_norm": 5.440062046051025, - "learning_rate": 4.5680027346075196e-05, - "loss": 0.8107, - "step": 29320 - }, - { - "epoch": 0.25928676249580085, - "grad_norm": 2.6743078231811523, - "learning_rate": 4.567855395840332e-05, - "loss": 0.6605, - "step": 29330 - }, - { - "epoch": 0.2593751657561131, - "grad_norm": 2.3574798107147217, - "learning_rate": 4.567708057073145e-05, - "loss": 0.721, - "step": 29340 - }, - { - "epoch": 0.2594635690164253, - "grad_norm": 3.5360846519470215, - "learning_rate": 4.567560718305958e-05, - "loss": 0.7052, - "step": 29350 - }, - { - "epoch": 0.25955197227673754, - "grad_norm": 2.8996875286102295, - "learning_rate": 4.567413379538771e-05, - "loss": 0.6752, - "step": 29360 - }, - { - "epoch": 0.25964037553704983, - "grad_norm": 8.65214729309082, - "learning_rate": 4.567266040771584e-05, - "loss": 0.7918, - "step": 29370 - }, - { - "epoch": 0.25972877879736206, - "grad_norm": 8.966941833496094, - "learning_rate": 4.567118702004397e-05, - "loss": 0.7335, - "step": 29380 - }, - { - "epoch": 0.2598171820576743, - "grad_norm": 2.327826976776123, - "learning_rate": 4.5669713632372094e-05, - "loss": 0.7333, - "step": 29390 - }, - { - "epoch": 0.2599055853179865, - "grad_norm": 8.276576042175293, - "learning_rate": 4.566824024470023e-05, - "loss": 0.8879, - "step": 29400 - }, - { - "epoch": 0.25999398857829875, - "grad_norm": 2.417679786682129, - "learning_rate": 4.566676685702835e-05, - "loss": 0.7675, - "step": 29410 - }, - { - "epoch": 0.260082391838611, - "grad_norm": 6.383553981781006, - "learning_rate": 4.5665293469356486e-05, - "loss": 0.7119, - "step": 29420 - }, - { - "epoch": 0.26017079509892327, - "grad_norm": 5.931835651397705, - "learning_rate": 4.5663820081684614e-05, - "loss": 0.8431, - "step": 29430 - }, - { - "epoch": 0.2602591983592355, - "grad_norm": 11.034318923950195, - "learning_rate": 4.566234669401274e-05, - "loss": 0.7607, - "step": 29440 - }, - { - "epoch": 0.26034760161954773, - "grad_norm": 4.3858513832092285, - "learning_rate": 4.566087330634087e-05, - "loss": 0.8781, - "step": 29450 - }, - { - "epoch": 0.26043600487985996, - "grad_norm": 3.791039228439331, - "learning_rate": 4.5659399918669006e-05, - "loss": 0.7861, - "step": 29460 - }, - { - "epoch": 0.2605244081401722, - "grad_norm": 1.4404274225234985, - "learning_rate": 4.565792653099713e-05, - "loss": 0.8871, - "step": 29470 - }, - { - "epoch": 0.2606128114004844, - "grad_norm": 3.2746682167053223, - "learning_rate": 4.565645314332526e-05, - "loss": 0.7398, - "step": 29480 - }, - { - "epoch": 0.2607012146607967, - "grad_norm": 3.9915900230407715, - "learning_rate": 4.565497975565339e-05, - "loss": 0.7003, - "step": 29490 - }, - { - "epoch": 0.26078961792110894, - "grad_norm": 6.544107913970947, - "learning_rate": 4.565350636798152e-05, - "loss": 0.8379, - "step": 29500 - }, - { - "epoch": 0.26087802118142117, - "grad_norm": 8.069681167602539, - "learning_rate": 4.565203298030965e-05, - "loss": 0.7272, - "step": 29510 - }, - { - "epoch": 0.2609664244417334, - "grad_norm": 5.080019474029541, - "learning_rate": 4.5650559592637776e-05, - "loss": 0.6892, - "step": 29520 - }, - { - "epoch": 0.26105482770204563, - "grad_norm": 4.58833122253418, - "learning_rate": 4.5649086204965905e-05, - "loss": 0.6827, - "step": 29530 - }, - { - "epoch": 0.26114323096235786, - "grad_norm": 2.668739080429077, - "learning_rate": 4.564761281729404e-05, - "loss": 0.7702, - "step": 29540 - }, - { - "epoch": 0.26123163422267015, - "grad_norm": 3.2595272064208984, - "learning_rate": 4.564613942962216e-05, - "loss": 0.7984, - "step": 29550 - }, - { - "epoch": 0.2613200374829824, - "grad_norm": 4.894524574279785, - "learning_rate": 4.56446660419503e-05, - "loss": 0.7828, - "step": 29560 - }, - { - "epoch": 0.2614084407432946, - "grad_norm": 7.50266170501709, - "learning_rate": 4.5643192654278425e-05, - "loss": 0.7309, - "step": 29570 - }, - { - "epoch": 0.26149684400360684, - "grad_norm": 10.72905158996582, - "learning_rate": 4.564171926660655e-05, - "loss": 0.8975, - "step": 29580 - }, - { - "epoch": 0.2615852472639191, - "grad_norm": 2.84784197807312, - "learning_rate": 4.564024587893468e-05, - "loss": 0.7273, - "step": 29590 - }, - { - "epoch": 0.26167365052423136, - "grad_norm": 4.118752479553223, - "learning_rate": 4.563877249126282e-05, - "loss": 0.7445, - "step": 29600 - }, - { - "epoch": 0.2617620537845436, - "grad_norm": 4.063949108123779, - "learning_rate": 4.563729910359094e-05, - "loss": 0.767, - "step": 29610 - }, - { - "epoch": 0.2618504570448558, - "grad_norm": 3.4891610145568848, - "learning_rate": 4.5635825715919074e-05, - "loss": 0.8095, - "step": 29620 - }, - { - "epoch": 0.26193886030516805, - "grad_norm": 3.2074899673461914, - "learning_rate": 4.56343523282472e-05, - "loss": 0.8034, - "step": 29630 - }, - { - "epoch": 0.2620272635654803, - "grad_norm": 3.521451950073242, - "learning_rate": 4.563287894057533e-05, - "loss": 0.7601, - "step": 29640 - }, - { - "epoch": 0.2621156668257925, - "grad_norm": 3.6601784229278564, - "learning_rate": 4.563140555290346e-05, - "loss": 0.8181, - "step": 29650 - }, - { - "epoch": 0.2622040700861048, - "grad_norm": 5.0407843589782715, - "learning_rate": 4.562993216523159e-05, - "loss": 0.6479, - "step": 29660 - }, - { - "epoch": 0.26229247334641703, - "grad_norm": 3.6762752532958984, - "learning_rate": 4.5628458777559715e-05, - "loss": 0.6781, - "step": 29670 - }, - { - "epoch": 0.26238087660672926, - "grad_norm": 4.478204727172852, - "learning_rate": 4.562698538988785e-05, - "loss": 0.7519, - "step": 29680 - }, - { - "epoch": 0.2624692798670415, - "grad_norm": 6.099710464477539, - "learning_rate": 4.562551200221598e-05, - "loss": 0.804, - "step": 29690 - }, - { - "epoch": 0.2625576831273537, - "grad_norm": 3.5862505435943604, - "learning_rate": 4.562403861454411e-05, - "loss": 0.8954, - "step": 29700 - }, - { - "epoch": 0.26264608638766596, - "grad_norm": 3.853513717651367, - "learning_rate": 4.5622565226872235e-05, - "loss": 0.7243, - "step": 29710 - }, - { - "epoch": 0.26273448964797824, - "grad_norm": 4.7881317138671875, - "learning_rate": 4.5621091839200364e-05, - "loss": 0.868, - "step": 29720 - }, - { - "epoch": 0.2628228929082905, - "grad_norm": 9.800519943237305, - "learning_rate": 4.561961845152849e-05, - "loss": 0.7831, - "step": 29730 - }, - { - "epoch": 0.2629112961686027, - "grad_norm": 2.545400857925415, - "learning_rate": 4.561814506385663e-05, - "loss": 0.7556, - "step": 29740 - }, - { - "epoch": 0.26299969942891493, - "grad_norm": 3.167473316192627, - "learning_rate": 4.5616671676184756e-05, - "loss": 0.7714, - "step": 29750 - }, - { - "epoch": 0.26308810268922717, - "grad_norm": 5.078524589538574, - "learning_rate": 4.5615198288512884e-05, - "loss": 0.8415, - "step": 29760 - }, - { - "epoch": 0.2631765059495394, - "grad_norm": 1.960549235343933, - "learning_rate": 4.561372490084101e-05, - "loss": 0.8475, - "step": 29770 - }, - { - "epoch": 0.2632649092098517, - "grad_norm": 5.510914325714111, - "learning_rate": 4.561225151316914e-05, - "loss": 0.8689, - "step": 29780 - }, - { - "epoch": 0.2633533124701639, - "grad_norm": 9.019115447998047, - "learning_rate": 4.561077812549727e-05, - "loss": 0.6989, - "step": 29790 - }, - { - "epoch": 0.26344171573047614, - "grad_norm": 1.9665924310684204, - "learning_rate": 4.56093047378254e-05, - "loss": 0.8381, - "step": 29800 - }, - { - "epoch": 0.2635301189907884, - "grad_norm": 4.336777687072754, - "learning_rate": 4.560783135015353e-05, - "loss": 0.7906, - "step": 29810 - }, - { - "epoch": 0.2636185222511006, - "grad_norm": 4.672365665435791, - "learning_rate": 4.560635796248166e-05, - "loss": 0.7862, - "step": 29820 - }, - { - "epoch": 0.26370692551141284, - "grad_norm": 5.014092445373535, - "learning_rate": 4.560488457480979e-05, - "loss": 0.8526, - "step": 29830 - }, - { - "epoch": 0.2637953287717251, - "grad_norm": 4.661694049835205, - "learning_rate": 4.560341118713792e-05, - "loss": 0.7181, - "step": 29840 - }, - { - "epoch": 0.26388373203203735, - "grad_norm": 1.9793035984039307, - "learning_rate": 4.5601937799466046e-05, - "loss": 0.6739, - "step": 29850 - }, - { - "epoch": 0.2639721352923496, - "grad_norm": 7.034848213195801, - "learning_rate": 4.5600464411794174e-05, - "loss": 0.8302, - "step": 29860 - }, - { - "epoch": 0.2640605385526618, - "grad_norm": 7.229719161987305, - "learning_rate": 4.559899102412231e-05, - "loss": 0.6392, - "step": 29870 - }, - { - "epoch": 0.26414894181297405, - "grad_norm": 12.791730880737305, - "learning_rate": 4.559751763645043e-05, - "loss": 0.6703, - "step": 29880 - }, - { - "epoch": 0.2642373450732863, - "grad_norm": 4.993589878082275, - "learning_rate": 4.5596044248778566e-05, - "loss": 0.8488, - "step": 29890 - }, - { - "epoch": 0.26432574833359856, - "grad_norm": 2.2029237747192383, - "learning_rate": 4.5594570861106695e-05, - "loss": 0.7983, - "step": 29900 - }, - { - "epoch": 0.2644141515939108, - "grad_norm": 1.6999568939208984, - "learning_rate": 4.559309747343482e-05, - "loss": 0.7184, - "step": 29910 - }, - { - "epoch": 0.264502554854223, - "grad_norm": 6.056368350982666, - "learning_rate": 4.559162408576295e-05, - "loss": 0.6905, - "step": 29920 - }, - { - "epoch": 0.26459095811453526, - "grad_norm": 4.264249801635742, - "learning_rate": 4.5590150698091086e-05, - "loss": 0.7324, - "step": 29930 - }, - { - "epoch": 0.2646793613748475, - "grad_norm": 3.543482780456543, - "learning_rate": 4.558867731041921e-05, - "loss": 0.8234, - "step": 29940 - }, - { - "epoch": 0.2647677646351597, - "grad_norm": 3.5544373989105225, - "learning_rate": 4.558720392274734e-05, - "loss": 0.5627, - "step": 29950 - }, - { - "epoch": 0.264856167895472, - "grad_norm": 5.946412086486816, - "learning_rate": 4.558573053507547e-05, - "loss": 0.7565, - "step": 29960 - }, - { - "epoch": 0.26494457115578424, - "grad_norm": 6.860233783721924, - "learning_rate": 4.55842571474036e-05, - "loss": 0.7988, - "step": 29970 - }, - { - "epoch": 0.26503297441609647, - "grad_norm": 5.926501274108887, - "learning_rate": 4.558278375973173e-05, - "loss": 0.8694, - "step": 29980 - }, - { - "epoch": 0.2651213776764087, - "grad_norm": 2.451258659362793, - "learning_rate": 4.5581310372059856e-05, - "loss": 0.7034, - "step": 29990 - }, - { - "epoch": 0.26520978093672093, - "grad_norm": 4.44228458404541, - "learning_rate": 4.5579836984387985e-05, - "loss": 0.7657, - "step": 30000 - }, - { - "epoch": 0.26529818419703316, - "grad_norm": 6.117458343505859, - "learning_rate": 4.557836359671612e-05, - "loss": 0.7869, - "step": 30010 - }, - { - "epoch": 0.26538658745734545, - "grad_norm": 6.146592140197754, - "learning_rate": 4.557689020904424e-05, - "loss": 0.6836, - "step": 30020 - }, - { - "epoch": 0.2654749907176577, - "grad_norm": 2.639390468597412, - "learning_rate": 4.557541682137238e-05, - "loss": 0.7957, - "step": 30030 - }, - { - "epoch": 0.2655633939779699, - "grad_norm": 2.850609302520752, - "learning_rate": 4.5573943433700505e-05, - "loss": 0.7955, - "step": 30040 - }, - { - "epoch": 0.26565179723828214, - "grad_norm": 2.463507890701294, - "learning_rate": 4.5572470046028633e-05, - "loss": 0.6879, - "step": 30050 - }, - { - "epoch": 0.26574020049859437, - "grad_norm": 2.183257579803467, - "learning_rate": 4.557099665835676e-05, - "loss": 0.6821, - "step": 30060 - }, - { - "epoch": 0.2658286037589066, - "grad_norm": 6.851439476013184, - "learning_rate": 4.55695232706849e-05, - "loss": 0.8008, - "step": 30070 - }, - { - "epoch": 0.2659170070192189, - "grad_norm": 0.9116033911705017, - "learning_rate": 4.556804988301302e-05, - "loss": 0.672, - "step": 30080 - }, - { - "epoch": 0.2660054102795311, - "grad_norm": 2.7948808670043945, - "learning_rate": 4.5566576495341154e-05, - "loss": 0.6747, - "step": 30090 - }, - { - "epoch": 0.26609381353984335, - "grad_norm": 5.142726898193359, - "learning_rate": 4.5565103107669275e-05, - "loss": 0.7999, - "step": 30100 - }, - { - "epoch": 0.2661822168001556, - "grad_norm": 2.4580373764038086, - "learning_rate": 4.556362971999741e-05, - "loss": 0.8119, - "step": 30110 - }, - { - "epoch": 0.2662706200604678, - "grad_norm": 8.661834716796875, - "learning_rate": 4.556215633232554e-05, - "loss": 0.8391, - "step": 30120 - }, - { - "epoch": 0.2663590233207801, - "grad_norm": 4.809054851531982, - "learning_rate": 4.556068294465367e-05, - "loss": 0.6991, - "step": 30130 - }, - { - "epoch": 0.26644742658109233, - "grad_norm": 5.437647342681885, - "learning_rate": 4.5559209556981795e-05, - "loss": 0.7262, - "step": 30140 - }, - { - "epoch": 0.26653582984140456, - "grad_norm": 2.0850820541381836, - "learning_rate": 4.555773616930993e-05, - "loss": 0.7492, - "step": 30150 - }, - { - "epoch": 0.2666242331017168, - "grad_norm": 4.553308010101318, - "learning_rate": 4.555626278163805e-05, - "loss": 0.6981, - "step": 30160 - }, - { - "epoch": 0.266712636362029, - "grad_norm": 2.7255380153656006, - "learning_rate": 4.555478939396619e-05, - "loss": 0.7656, - "step": 30170 - }, - { - "epoch": 0.26680103962234125, - "grad_norm": 18.0553035736084, - "learning_rate": 4.5553316006294316e-05, - "loss": 0.8432, - "step": 30180 - }, - { - "epoch": 0.26688944288265354, - "grad_norm": 3.5528764724731445, - "learning_rate": 4.5551842618622444e-05, - "loss": 0.9037, - "step": 30190 - }, - { - "epoch": 0.26697784614296577, - "grad_norm": 3.4594099521636963, - "learning_rate": 4.555036923095057e-05, - "loss": 0.6194, - "step": 30200 - }, - { - "epoch": 0.267066249403278, - "grad_norm": 4.578429698944092, - "learning_rate": 4.554889584327871e-05, - "loss": 0.7139, - "step": 30210 - }, - { - "epoch": 0.26715465266359023, - "grad_norm": 3.477224111557007, - "learning_rate": 4.554742245560683e-05, - "loss": 0.7869, - "step": 30220 - }, - { - "epoch": 0.26724305592390246, - "grad_norm": 2.6681270599365234, - "learning_rate": 4.5545949067934964e-05, - "loss": 0.8341, - "step": 30230 - }, - { - "epoch": 0.2673314591842147, - "grad_norm": 2.92987322807312, - "learning_rate": 4.5544475680263086e-05, - "loss": 0.8675, - "step": 30240 - }, - { - "epoch": 0.267419862444527, - "grad_norm": 4.547966957092285, - "learning_rate": 4.554300229259122e-05, - "loss": 0.6654, - "step": 30250 - }, - { - "epoch": 0.2675082657048392, - "grad_norm": 5.176558971405029, - "learning_rate": 4.554152890491935e-05, - "loss": 0.8412, - "step": 30260 - }, - { - "epoch": 0.26759666896515144, - "grad_norm": 8.517532348632812, - "learning_rate": 4.554005551724748e-05, - "loss": 0.7317, - "step": 30270 - }, - { - "epoch": 0.26768507222546367, - "grad_norm": 4.544113636016846, - "learning_rate": 4.5538582129575606e-05, - "loss": 0.6362, - "step": 30280 - }, - { - "epoch": 0.2677734754857759, - "grad_norm": 6.943235874176025, - "learning_rate": 4.553710874190374e-05, - "loss": 0.7622, - "step": 30290 - }, - { - "epoch": 0.26786187874608813, - "grad_norm": 5.016669273376465, - "learning_rate": 4.553563535423186e-05, - "loss": 0.6729, - "step": 30300 - }, - { - "epoch": 0.2679502820064004, - "grad_norm": 6.666787147521973, - "learning_rate": 4.553416196656e-05, - "loss": 0.7929, - "step": 30310 - }, - { - "epoch": 0.26803868526671265, - "grad_norm": 7.704743385314941, - "learning_rate": 4.5532688578888126e-05, - "loss": 0.7398, - "step": 30320 - }, - { - "epoch": 0.2681270885270249, - "grad_norm": 2.8809094429016113, - "learning_rate": 4.5531215191216254e-05, - "loss": 0.6199, - "step": 30330 - }, - { - "epoch": 0.2682154917873371, - "grad_norm": 5.0369367599487305, - "learning_rate": 4.552974180354438e-05, - "loss": 0.8144, - "step": 30340 - }, - { - "epoch": 0.26830389504764934, - "grad_norm": 5.744924068450928, - "learning_rate": 4.552826841587251e-05, - "loss": 0.8257, - "step": 30350 - }, - { - "epoch": 0.2683922983079616, - "grad_norm": 6.447247505187988, - "learning_rate": 4.552679502820064e-05, - "loss": 0.737, - "step": 30360 - }, - { - "epoch": 0.26848070156827386, - "grad_norm": 5.7486467361450195, - "learning_rate": 4.5525321640528775e-05, - "loss": 0.7288, - "step": 30370 - }, - { - "epoch": 0.2685691048285861, - "grad_norm": 2.520045518875122, - "learning_rate": 4.5523848252856896e-05, - "loss": 0.8139, - "step": 30380 - }, - { - "epoch": 0.2686575080888983, - "grad_norm": 2.754589796066284, - "learning_rate": 4.552237486518503e-05, - "loss": 0.7589, - "step": 30390 - }, - { - "epoch": 0.26874591134921055, - "grad_norm": 3.138747453689575, - "learning_rate": 4.552090147751316e-05, - "loss": 0.7891, - "step": 30400 - }, - { - "epoch": 0.2688343146095228, - "grad_norm": 2.6308279037475586, - "learning_rate": 4.551942808984129e-05, - "loss": 0.8124, - "step": 30410 - }, - { - "epoch": 0.268922717869835, - "grad_norm": 8.300191879272461, - "learning_rate": 4.5517954702169416e-05, - "loss": 0.7729, - "step": 30420 - }, - { - "epoch": 0.2690111211301473, - "grad_norm": 3.9517529010772705, - "learning_rate": 4.551648131449755e-05, - "loss": 0.802, - "step": 30430 - }, - { - "epoch": 0.26909952439045953, - "grad_norm": 4.681349754333496, - "learning_rate": 4.551500792682567e-05, - "loss": 0.6749, - "step": 30440 - }, - { - "epoch": 0.26918792765077176, - "grad_norm": 6.55589485168457, - "learning_rate": 4.551353453915381e-05, - "loss": 0.7256, - "step": 30450 - }, - { - "epoch": 0.269276330911084, - "grad_norm": 7.0311360359191895, - "learning_rate": 4.551206115148193e-05, - "loss": 0.7756, - "step": 30460 - }, - { - "epoch": 0.2693647341713962, - "grad_norm": 2.4952967166900635, - "learning_rate": 4.5510587763810065e-05, - "loss": 0.6642, - "step": 30470 - }, - { - "epoch": 0.26945313743170846, - "grad_norm": 7.936524391174316, - "learning_rate": 4.550911437613819e-05, - "loss": 0.7063, - "step": 30480 - }, - { - "epoch": 0.26954154069202074, - "grad_norm": 4.251359462738037, - "learning_rate": 4.550764098846632e-05, - "loss": 0.8426, - "step": 30490 - }, - { - "epoch": 0.269629943952333, - "grad_norm": 3.33892822265625, - "learning_rate": 4.550616760079445e-05, - "loss": 0.7088, - "step": 30500 - }, - { - "epoch": 0.2697183472126452, - "grad_norm": 7.408570766448975, - "learning_rate": 4.5504694213122585e-05, - "loss": 0.6597, - "step": 30510 - }, - { - "epoch": 0.26980675047295744, - "grad_norm": 5.651181221008301, - "learning_rate": 4.550322082545071e-05, - "loss": 0.8075, - "step": 30520 - }, - { - "epoch": 0.26989515373326967, - "grad_norm": 7.12681770324707, - "learning_rate": 4.550174743777884e-05, - "loss": 0.6747, - "step": 30530 - }, - { - "epoch": 0.2699835569935819, - "grad_norm": 6.387990474700928, - "learning_rate": 4.550027405010697e-05, - "loss": 0.7736, - "step": 30540 - }, - { - "epoch": 0.2700719602538942, - "grad_norm": 3.5151522159576416, - "learning_rate": 4.54988006624351e-05, - "loss": 0.9457, - "step": 30550 - }, - { - "epoch": 0.2701603635142064, - "grad_norm": 2.4693853855133057, - "learning_rate": 4.549732727476323e-05, - "loss": 0.6732, - "step": 30560 - }, - { - "epoch": 0.27024876677451865, - "grad_norm": 6.303086280822754, - "learning_rate": 4.549585388709136e-05, - "loss": 0.6541, - "step": 30570 - }, - { - "epoch": 0.2703371700348309, - "grad_norm": 4.603034973144531, - "learning_rate": 4.5494380499419484e-05, - "loss": 0.7209, - "step": 30580 - }, - { - "epoch": 0.2704255732951431, - "grad_norm": 3.458364486694336, - "learning_rate": 4.549290711174762e-05, - "loss": 0.7822, - "step": 30590 - }, - { - "epoch": 0.27051397655545534, - "grad_norm": 1.8052581548690796, - "learning_rate": 4.549143372407575e-05, - "loss": 0.7178, - "step": 30600 - }, - { - "epoch": 0.2706023798157676, - "grad_norm": 4.1473541259765625, - "learning_rate": 4.5489960336403875e-05, - "loss": 0.7764, - "step": 30610 - }, - { - "epoch": 0.27069078307607986, - "grad_norm": 7.287553310394287, - "learning_rate": 4.5488486948732004e-05, - "loss": 0.7904, - "step": 30620 - }, - { - "epoch": 0.2707791863363921, - "grad_norm": 10.020027160644531, - "learning_rate": 4.548701356106013e-05, - "loss": 0.8004, - "step": 30630 - }, - { - "epoch": 0.2708675895967043, - "grad_norm": 3.4412293434143066, - "learning_rate": 4.548554017338826e-05, - "loss": 0.6155, - "step": 30640 - }, - { - "epoch": 0.27095599285701655, - "grad_norm": 13.466927528381348, - "learning_rate": 4.5484066785716396e-05, - "loss": 0.7676, - "step": 30650 - }, - { - "epoch": 0.27104439611732883, - "grad_norm": 3.792027473449707, - "learning_rate": 4.5482593398044524e-05, - "loss": 0.7674, - "step": 30660 - }, - { - "epoch": 0.27113279937764106, - "grad_norm": 2.025473117828369, - "learning_rate": 4.548112001037265e-05, - "loss": 0.6348, - "step": 30670 - }, - { - "epoch": 0.2712212026379533, - "grad_norm": 3.9698288440704346, - "learning_rate": 4.547964662270078e-05, - "loss": 0.742, - "step": 30680 - }, - { - "epoch": 0.2713096058982655, - "grad_norm": 2.8065898418426514, - "learning_rate": 4.547817323502891e-05, - "loss": 0.7711, - "step": 30690 - }, - { - "epoch": 0.27139800915857776, - "grad_norm": 1.3832138776779175, - "learning_rate": 4.547669984735704e-05, - "loss": 0.6708, - "step": 30700 - }, - { - "epoch": 0.27148641241889, - "grad_norm": 4.950160980224609, - "learning_rate": 4.5475226459685166e-05, - "loss": 0.8564, - "step": 30710 - }, - { - "epoch": 0.2715748156792023, - "grad_norm": 5.886739730834961, - "learning_rate": 4.54737530720133e-05, - "loss": 0.7204, - "step": 30720 - }, - { - "epoch": 0.2716632189395145, - "grad_norm": 5.6793317794799805, - "learning_rate": 4.547227968434143e-05, - "loss": 0.6691, - "step": 30730 - }, - { - "epoch": 0.27175162219982674, - "grad_norm": 5.759725570678711, - "learning_rate": 4.547080629666956e-05, - "loss": 0.7842, - "step": 30740 - }, - { - "epoch": 0.27184002546013897, - "grad_norm": 6.173468112945557, - "learning_rate": 4.5469332908997686e-05, - "loss": 0.8394, - "step": 30750 - }, - { - "epoch": 0.2719284287204512, - "grad_norm": 4.296173572540283, - "learning_rate": 4.5467859521325814e-05, - "loss": 0.755, - "step": 30760 - }, - { - "epoch": 0.27201683198076343, - "grad_norm": 2.925757646560669, - "learning_rate": 4.546638613365394e-05, - "loss": 0.7721, - "step": 30770 - }, - { - "epoch": 0.2721052352410757, - "grad_norm": 3.2852859497070312, - "learning_rate": 4.546491274598208e-05, - "loss": 0.9166, - "step": 30780 - }, - { - "epoch": 0.27219363850138795, - "grad_norm": 4.882367134094238, - "learning_rate": 4.5463439358310206e-05, - "loss": 0.8579, - "step": 30790 - }, - { - "epoch": 0.2722820417617002, - "grad_norm": 3.473365306854248, - "learning_rate": 4.5461965970638334e-05, - "loss": 0.8085, - "step": 30800 - }, - { - "epoch": 0.2723704450220124, - "grad_norm": 4.500138282775879, - "learning_rate": 4.546049258296646e-05, - "loss": 0.7087, - "step": 30810 - }, - { - "epoch": 0.27245884828232464, - "grad_norm": 3.8217194080352783, - "learning_rate": 4.545901919529459e-05, - "loss": 0.6978, - "step": 30820 - }, - { - "epoch": 0.27254725154263687, - "grad_norm": 2.913564443588257, - "learning_rate": 4.545754580762272e-05, - "loss": 0.7134, - "step": 30830 - }, - { - "epoch": 0.27263565480294916, - "grad_norm": 5.970975399017334, - "learning_rate": 4.5456072419950855e-05, - "loss": 0.7294, - "step": 30840 - }, - { - "epoch": 0.2727240580632614, - "grad_norm": 4.244891166687012, - "learning_rate": 4.5454599032278976e-05, - "loss": 0.8509, - "step": 30850 - }, - { - "epoch": 0.2728124613235736, - "grad_norm": 3.983084201812744, - "learning_rate": 4.545312564460711e-05, - "loss": 0.8641, - "step": 30860 - }, - { - "epoch": 0.27290086458388585, - "grad_norm": 3.3314168453216553, - "learning_rate": 4.545165225693524e-05, - "loss": 0.7002, - "step": 30870 - }, - { - "epoch": 0.2729892678441981, - "grad_norm": 5.25112771987915, - "learning_rate": 4.545017886926337e-05, - "loss": 0.7936, - "step": 30880 - }, - { - "epoch": 0.2730776711045103, - "grad_norm": 1.5507129430770874, - "learning_rate": 4.5448705481591496e-05, - "loss": 0.6824, - "step": 30890 - }, - { - "epoch": 0.2731660743648226, - "grad_norm": 2.241774320602417, - "learning_rate": 4.544723209391963e-05, - "loss": 0.6614, - "step": 30900 - }, - { - "epoch": 0.27325447762513483, - "grad_norm": 9.836780548095703, - "learning_rate": 4.544575870624775e-05, - "loss": 0.8151, - "step": 30910 - }, - { - "epoch": 0.27334288088544706, - "grad_norm": 1.664473295211792, - "learning_rate": 4.544428531857589e-05, - "loss": 0.7083, - "step": 30920 - }, - { - "epoch": 0.2734312841457593, - "grad_norm": 7.396553993225098, - "learning_rate": 4.544281193090401e-05, - "loss": 0.7315, - "step": 30930 - }, - { - "epoch": 0.2735196874060715, - "grad_norm": 4.085721492767334, - "learning_rate": 4.5441338543232145e-05, - "loss": 0.7369, - "step": 30940 - }, - { - "epoch": 0.27360809066638375, - "grad_norm": 2.1590044498443604, - "learning_rate": 4.543986515556027e-05, - "loss": 0.7037, - "step": 30950 - }, - { - "epoch": 0.27369649392669604, - "grad_norm": 6.069953918457031, - "learning_rate": 4.54383917678884e-05, - "loss": 0.7966, - "step": 30960 - }, - { - "epoch": 0.27378489718700827, - "grad_norm": 7.211731433868408, - "learning_rate": 4.543691838021653e-05, - "loss": 0.7953, - "step": 30970 - }, - { - "epoch": 0.2738733004473205, - "grad_norm": 2.097848415374756, - "learning_rate": 4.5435444992544665e-05, - "loss": 0.6958, - "step": 30980 - }, - { - "epoch": 0.27396170370763273, - "grad_norm": 4.619161605834961, - "learning_rate": 4.543397160487279e-05, - "loss": 0.7856, - "step": 30990 - }, - { - "epoch": 0.27405010696794496, - "grad_norm": 6.556807994842529, - "learning_rate": 4.543249821720092e-05, - "loss": 0.6143, - "step": 31000 - }, - { - "epoch": 0.2741385102282572, - "grad_norm": 3.8834311962127686, - "learning_rate": 4.543102482952905e-05, - "loss": 0.7609, - "step": 31010 - }, - { - "epoch": 0.2742269134885695, - "grad_norm": 6.754446983337402, - "learning_rate": 4.542955144185718e-05, - "loss": 0.8063, - "step": 31020 - }, - { - "epoch": 0.2743153167488817, - "grad_norm": 6.257603645324707, - "learning_rate": 4.542807805418531e-05, - "loss": 0.7922, - "step": 31030 - }, - { - "epoch": 0.27440372000919394, - "grad_norm": 2.162466526031494, - "learning_rate": 4.542660466651344e-05, - "loss": 0.8371, - "step": 31040 - }, - { - "epoch": 0.27449212326950617, - "grad_norm": 3.549175977706909, - "learning_rate": 4.5425131278841564e-05, - "loss": 0.7575, - "step": 31050 - }, - { - "epoch": 0.2745805265298184, - "grad_norm": 6.57182502746582, - "learning_rate": 4.54236578911697e-05, - "loss": 0.7086, - "step": 31060 - }, - { - "epoch": 0.27466892979013063, - "grad_norm": 7.509003162384033, - "learning_rate": 4.542218450349782e-05, - "loss": 0.7143, - "step": 31070 - }, - { - "epoch": 0.2747573330504429, - "grad_norm": 4.139196395874023, - "learning_rate": 4.5420711115825955e-05, - "loss": 0.775, - "step": 31080 - }, - { - "epoch": 0.27484573631075515, - "grad_norm": 4.169070243835449, - "learning_rate": 4.5419237728154084e-05, - "loss": 0.7722, - "step": 31090 - }, - { - "epoch": 0.2749341395710674, - "grad_norm": 5.725850582122803, - "learning_rate": 4.541776434048221e-05, - "loss": 0.7125, - "step": 31100 - }, - { - "epoch": 0.2750225428313796, - "grad_norm": 7.626198768615723, - "learning_rate": 4.541629095281034e-05, - "loss": 0.8489, - "step": 31110 - }, - { - "epoch": 0.27511094609169184, - "grad_norm": 7.418731212615967, - "learning_rate": 4.5414817565138476e-05, - "loss": 0.8092, - "step": 31120 - }, - { - "epoch": 0.2751993493520041, - "grad_norm": 1.7932617664337158, - "learning_rate": 4.54133441774666e-05, - "loss": 0.8192, - "step": 31130 - }, - { - "epoch": 0.27528775261231636, - "grad_norm": 4.300790309906006, - "learning_rate": 4.541187078979473e-05, - "loss": 0.7661, - "step": 31140 - }, - { - "epoch": 0.2753761558726286, - "grad_norm": 5.326014518737793, - "learning_rate": 4.541039740212286e-05, - "loss": 0.6129, - "step": 31150 - }, - { - "epoch": 0.2754645591329408, - "grad_norm": 8.741189956665039, - "learning_rate": 4.540892401445099e-05, - "loss": 0.8287, - "step": 31160 - }, - { - "epoch": 0.27555296239325305, - "grad_norm": 9.096046447753906, - "learning_rate": 4.540745062677912e-05, - "loss": 0.8181, - "step": 31170 - }, - { - "epoch": 0.2756413656535653, - "grad_norm": 5.3577399253845215, - "learning_rate": 4.5405977239107246e-05, - "loss": 0.6864, - "step": 31180 - }, - { - "epoch": 0.27572976891387757, - "grad_norm": 2.724480628967285, - "learning_rate": 4.5404503851435374e-05, - "loss": 0.7777, - "step": 31190 - }, - { - "epoch": 0.2758181721741898, - "grad_norm": 3.459695816040039, - "learning_rate": 4.540303046376351e-05, - "loss": 0.7123, - "step": 31200 - }, - { - "epoch": 0.27590657543450203, - "grad_norm": 6.0657501220703125, - "learning_rate": 4.540155707609163e-05, - "loss": 0.6474, - "step": 31210 - }, - { - "epoch": 0.27599497869481426, - "grad_norm": 3.518301010131836, - "learning_rate": 4.5400083688419766e-05, - "loss": 0.6475, - "step": 31220 - }, - { - "epoch": 0.2760833819551265, - "grad_norm": 6.364334583282471, - "learning_rate": 4.5398610300747894e-05, - "loss": 0.6488, - "step": 31230 - }, - { - "epoch": 0.2761717852154387, - "grad_norm": 7.290544509887695, - "learning_rate": 4.539713691307602e-05, - "loss": 0.8546, - "step": 31240 - }, - { - "epoch": 0.276260188475751, - "grad_norm": 5.463634967803955, - "learning_rate": 4.539566352540415e-05, - "loss": 0.8186, - "step": 31250 - }, - { - "epoch": 0.27634859173606324, - "grad_norm": 7.422460079193115, - "learning_rate": 4.5394190137732286e-05, - "loss": 0.7488, - "step": 31260 - }, - { - "epoch": 0.2764369949963755, - "grad_norm": 4.348067760467529, - "learning_rate": 4.539271675006041e-05, - "loss": 0.7998, - "step": 31270 - }, - { - "epoch": 0.2765253982566877, - "grad_norm": 7.973194122314453, - "learning_rate": 4.539124336238854e-05, - "loss": 0.756, - "step": 31280 - }, - { - "epoch": 0.27661380151699994, - "grad_norm": 10.215353012084961, - "learning_rate": 4.5389769974716664e-05, - "loss": 0.6645, - "step": 31290 - }, - { - "epoch": 0.27670220477731217, - "grad_norm": 4.779819965362549, - "learning_rate": 4.53882965870448e-05, - "loss": 0.748, - "step": 31300 - }, - { - "epoch": 0.27679060803762445, - "grad_norm": 10.566243171691895, - "learning_rate": 4.538682319937293e-05, - "loss": 0.721, - "step": 31310 - }, - { - "epoch": 0.2768790112979367, - "grad_norm": 3.0014986991882324, - "learning_rate": 4.5385349811701056e-05, - "loss": 0.6974, - "step": 31320 - }, - { - "epoch": 0.2769674145582489, - "grad_norm": 3.077083110809326, - "learning_rate": 4.5383876424029185e-05, - "loss": 0.85, - "step": 31330 - }, - { - "epoch": 0.27705581781856115, - "grad_norm": 3.404559373855591, - "learning_rate": 4.538240303635732e-05, - "loss": 0.8457, - "step": 31340 - }, - { - "epoch": 0.2771442210788734, - "grad_norm": 5.274980068206787, - "learning_rate": 4.538092964868544e-05, - "loss": 0.7926, - "step": 31350 - }, - { - "epoch": 0.2772326243391856, - "grad_norm": 18.80033302307129, - "learning_rate": 4.5379456261013577e-05, - "loss": 0.7141, - "step": 31360 - }, - { - "epoch": 0.2773210275994979, - "grad_norm": 2.937209129333496, - "learning_rate": 4.5377982873341705e-05, - "loss": 0.7576, - "step": 31370 - }, - { - "epoch": 0.2774094308598101, - "grad_norm": 10.378564834594727, - "learning_rate": 4.537650948566983e-05, - "loss": 0.7622, - "step": 31380 - }, - { - "epoch": 0.27749783412012236, - "grad_norm": 6.353061676025391, - "learning_rate": 4.537503609799796e-05, - "loss": 0.826, - "step": 31390 - }, - { - "epoch": 0.2775862373804346, - "grad_norm": 8.064691543579102, - "learning_rate": 4.537356271032609e-05, - "loss": 0.7963, - "step": 31400 - }, - { - "epoch": 0.2776746406407468, - "grad_norm": 23.419443130493164, - "learning_rate": 4.537208932265422e-05, - "loss": 0.7, - "step": 31410 - }, - { - "epoch": 0.27776304390105905, - "grad_norm": 4.450402736663818, - "learning_rate": 4.5370615934982353e-05, - "loss": 0.6737, - "step": 31420 - }, - { - "epoch": 0.27785144716137133, - "grad_norm": 3.4850399494171143, - "learning_rate": 4.5369142547310475e-05, - "loss": 0.7534, - "step": 31430 - }, - { - "epoch": 0.27793985042168357, - "grad_norm": 2.655029535293579, - "learning_rate": 4.536766915963861e-05, - "loss": 0.7924, - "step": 31440 - }, - { - "epoch": 0.2780282536819958, - "grad_norm": 2.5935635566711426, - "learning_rate": 4.536619577196674e-05, - "loss": 0.7457, - "step": 31450 - }, - { - "epoch": 0.278116656942308, - "grad_norm": 2.8094661235809326, - "learning_rate": 4.536472238429487e-05, - "loss": 0.8115, - "step": 31460 - }, - { - "epoch": 0.27820506020262026, - "grad_norm": 5.6358256340026855, - "learning_rate": 4.5363248996622995e-05, - "loss": 0.7454, - "step": 31470 - }, - { - "epoch": 0.2782934634629325, - "grad_norm": 1.1668996810913086, - "learning_rate": 4.536177560895113e-05, - "loss": 0.6985, - "step": 31480 - }, - { - "epoch": 0.2783818667232448, - "grad_norm": 5.340427398681641, - "learning_rate": 4.536030222127925e-05, - "loss": 0.8203, - "step": 31490 - }, - { - "epoch": 0.278470269983557, - "grad_norm": 7.364120960235596, - "learning_rate": 4.535882883360739e-05, - "loss": 0.5594, - "step": 31500 - }, - { - "epoch": 0.27855867324386924, - "grad_norm": 2.81132435798645, - "learning_rate": 4.5357355445935515e-05, - "loss": 0.7554, - "step": 31510 - }, - { - "epoch": 0.27864707650418147, - "grad_norm": 7.576087951660156, - "learning_rate": 4.5355882058263644e-05, - "loss": 0.8755, - "step": 31520 - }, - { - "epoch": 0.2787354797644937, - "grad_norm": 6.171428203582764, - "learning_rate": 4.535440867059177e-05, - "loss": 0.8165, - "step": 31530 - }, - { - "epoch": 0.27882388302480593, - "grad_norm": 4.261667728424072, - "learning_rate": 4.53529352829199e-05, - "loss": 0.8848, - "step": 31540 - }, - { - "epoch": 0.2789122862851182, - "grad_norm": 1.7031608819961548, - "learning_rate": 4.535146189524803e-05, - "loss": 0.7545, - "step": 31550 - }, - { - "epoch": 0.27900068954543045, - "grad_norm": 3.160386562347412, - "learning_rate": 4.5349988507576164e-05, - "loss": 0.7302, - "step": 31560 - }, - { - "epoch": 0.2790890928057427, - "grad_norm": 3.2563092708587646, - "learning_rate": 4.534851511990429e-05, - "loss": 0.8251, - "step": 31570 - }, - { - "epoch": 0.2791774960660549, - "grad_norm": 2.8622827529907227, - "learning_rate": 4.534704173223242e-05, - "loss": 0.7475, - "step": 31580 - }, - { - "epoch": 0.27926589932636714, - "grad_norm": 2.567610740661621, - "learning_rate": 4.534556834456055e-05, - "loss": 0.6366, - "step": 31590 - }, - { - "epoch": 0.27935430258667937, - "grad_norm": 1.403955340385437, - "learning_rate": 4.534409495688868e-05, - "loss": 0.6678, - "step": 31600 - }, - { - "epoch": 0.27944270584699166, - "grad_norm": 3.9427952766418457, - "learning_rate": 4.5342621569216806e-05, - "loss": 0.7393, - "step": 31610 - }, - { - "epoch": 0.2795311091073039, - "grad_norm": 14.722577095031738, - "learning_rate": 4.534114818154494e-05, - "loss": 0.8377, - "step": 31620 - }, - { - "epoch": 0.2796195123676161, - "grad_norm": 5.493706703186035, - "learning_rate": 4.533967479387307e-05, - "loss": 0.8231, - "step": 31630 - }, - { - "epoch": 0.27970791562792835, - "grad_norm": 15.475202560424805, - "learning_rate": 4.53382014062012e-05, - "loss": 0.8847, - "step": 31640 - }, - { - "epoch": 0.2797963188882406, - "grad_norm": 3.6678528785705566, - "learning_rate": 4.5336728018529326e-05, - "loss": 0.7225, - "step": 31650 - }, - { - "epoch": 0.2798847221485528, - "grad_norm": 4.4838738441467285, - "learning_rate": 4.5335254630857454e-05, - "loss": 0.7591, - "step": 31660 - }, - { - "epoch": 0.2799731254088651, - "grad_norm": 2.967912435531616, - "learning_rate": 4.533378124318558e-05, - "loss": 0.8195, - "step": 31670 - }, - { - "epoch": 0.28006152866917733, - "grad_norm": 2.3313729763031006, - "learning_rate": 4.533230785551371e-05, - "loss": 0.8615, - "step": 31680 - }, - { - "epoch": 0.28014993192948956, - "grad_norm": 3.8316330909729004, - "learning_rate": 4.5330834467841846e-05, - "loss": 0.7331, - "step": 31690 - }, - { - "epoch": 0.2802383351898018, - "grad_norm": 7.374897480010986, - "learning_rate": 4.5329361080169974e-05, - "loss": 0.8383, - "step": 31700 - }, - { - "epoch": 0.280326738450114, - "grad_norm": 2.9984328746795654, - "learning_rate": 4.53278876924981e-05, - "loss": 0.8201, - "step": 31710 - }, - { - "epoch": 0.2804151417104263, - "grad_norm": 7.2484211921691895, - "learning_rate": 4.532641430482623e-05, - "loss": 0.8188, - "step": 31720 - }, - { - "epoch": 0.28050354497073854, - "grad_norm": 5.417243480682373, - "learning_rate": 4.532494091715436e-05, - "loss": 0.7514, - "step": 31730 - }, - { - "epoch": 0.28059194823105077, - "grad_norm": 2.9138054847717285, - "learning_rate": 4.532346752948249e-05, - "loss": 0.7529, - "step": 31740 - }, - { - "epoch": 0.280680351491363, - "grad_norm": 1.2528184652328491, - "learning_rate": 4.532199414181062e-05, - "loss": 0.7594, - "step": 31750 - }, - { - "epoch": 0.28076875475167523, - "grad_norm": 5.503701210021973, - "learning_rate": 4.5320520754138745e-05, - "loss": 0.8043, - "step": 31760 - }, - { - "epoch": 0.28085715801198746, - "grad_norm": 1.4960774183273315, - "learning_rate": 4.531904736646688e-05, - "loss": 0.6899, - "step": 31770 - }, - { - "epoch": 0.28094556127229975, - "grad_norm": 2.262402057647705, - "learning_rate": 4.531757397879501e-05, - "loss": 0.7645, - "step": 31780 - }, - { - "epoch": 0.281033964532612, - "grad_norm": 4.280818939208984, - "learning_rate": 4.5316100591123136e-05, - "loss": 0.6701, - "step": 31790 - }, - { - "epoch": 0.2811223677929242, - "grad_norm": 5.302253246307373, - "learning_rate": 4.5314627203451265e-05, - "loss": 0.8618, - "step": 31800 - }, - { - "epoch": 0.28121077105323644, - "grad_norm": 4.153197288513184, - "learning_rate": 4.53131538157794e-05, - "loss": 0.8364, - "step": 31810 - }, - { - "epoch": 0.28129917431354867, - "grad_norm": 4.214580535888672, - "learning_rate": 4.531168042810752e-05, - "loss": 0.8173, - "step": 31820 - }, - { - "epoch": 0.2813875775738609, - "grad_norm": 9.246479988098145, - "learning_rate": 4.5310207040435657e-05, - "loss": 0.7248, - "step": 31830 - }, - { - "epoch": 0.2814759808341732, - "grad_norm": 3.1720852851867676, - "learning_rate": 4.5308733652763785e-05, - "loss": 0.7114, - "step": 31840 - }, - { - "epoch": 0.2815643840944854, - "grad_norm": 2.7999463081359863, - "learning_rate": 4.530726026509191e-05, - "loss": 0.6936, - "step": 31850 - }, - { - "epoch": 0.28165278735479765, - "grad_norm": 1.7840044498443604, - "learning_rate": 4.530578687742004e-05, - "loss": 0.6884, - "step": 31860 - }, - { - "epoch": 0.2817411906151099, - "grad_norm": 2.3521809577941895, - "learning_rate": 4.530431348974817e-05, - "loss": 0.8267, - "step": 31870 - }, - { - "epoch": 0.2818295938754221, - "grad_norm": 2.475029230117798, - "learning_rate": 4.53028401020763e-05, - "loss": 0.7436, - "step": 31880 - }, - { - "epoch": 0.28191799713573434, - "grad_norm": 5.852056503295898, - "learning_rate": 4.5301366714404433e-05, - "loss": 0.7246, - "step": 31890 - }, - { - "epoch": 0.28200640039604663, - "grad_norm": 5.086641311645508, - "learning_rate": 4.5299893326732555e-05, - "loss": 0.7414, - "step": 31900 - }, - { - "epoch": 0.28209480365635886, - "grad_norm": 3.8775100708007812, - "learning_rate": 4.529841993906069e-05, - "loss": 0.7888, - "step": 31910 - }, - { - "epoch": 0.2821832069166711, - "grad_norm": 2.451167583465576, - "learning_rate": 4.529694655138882e-05, - "loss": 0.6424, - "step": 31920 - }, - { - "epoch": 0.2822716101769833, - "grad_norm": 7.1754913330078125, - "learning_rate": 4.529547316371695e-05, - "loss": 0.6673, - "step": 31930 - }, - { - "epoch": 0.28236001343729555, - "grad_norm": 1.0810681581497192, - "learning_rate": 4.5293999776045075e-05, - "loss": 0.7369, - "step": 31940 - }, - { - "epoch": 0.2824484166976078, - "grad_norm": 3.676067352294922, - "learning_rate": 4.529252638837321e-05, - "loss": 0.9427, - "step": 31950 - }, - { - "epoch": 0.28253681995792007, - "grad_norm": 11.599294662475586, - "learning_rate": 4.529105300070133e-05, - "loss": 0.8924, - "step": 31960 - }, - { - "epoch": 0.2826252232182323, - "grad_norm": 3.139760971069336, - "learning_rate": 4.528957961302947e-05, - "loss": 0.7779, - "step": 31970 - }, - { - "epoch": 0.28271362647854453, - "grad_norm": 5.7416486740112305, - "learning_rate": 4.5288106225357595e-05, - "loss": 0.7921, - "step": 31980 - }, - { - "epoch": 0.28280202973885676, - "grad_norm": 2.841639757156372, - "learning_rate": 4.5286632837685724e-05, - "loss": 0.6645, - "step": 31990 - }, - { - "epoch": 0.282890432999169, - "grad_norm": 2.6094791889190674, - "learning_rate": 4.528515945001385e-05, - "loss": 0.7191, - "step": 32000 - }, - { - "epoch": 0.2829788362594812, - "grad_norm": 9.99990177154541, - "learning_rate": 4.528368606234198e-05, - "loss": 0.7702, - "step": 32010 - }, - { - "epoch": 0.2830672395197935, - "grad_norm": 6.792905807495117, - "learning_rate": 4.528221267467011e-05, - "loss": 0.8994, - "step": 32020 - }, - { - "epoch": 0.28315564278010574, - "grad_norm": 4.174898624420166, - "learning_rate": 4.5280739286998244e-05, - "loss": 0.7427, - "step": 32030 - }, - { - "epoch": 0.283244046040418, - "grad_norm": 2.525766611099243, - "learning_rate": 4.5279265899326366e-05, - "loss": 0.7161, - "step": 32040 - }, - { - "epoch": 0.2833324493007302, - "grad_norm": 2.4678916931152344, - "learning_rate": 4.52777925116545e-05, - "loss": 0.7507, - "step": 32050 - }, - { - "epoch": 0.28342085256104244, - "grad_norm": 2.9384021759033203, - "learning_rate": 4.527631912398263e-05, - "loss": 0.8175, - "step": 32060 - }, - { - "epoch": 0.28350925582135467, - "grad_norm": 2.0960898399353027, - "learning_rate": 4.527484573631076e-05, - "loss": 0.814, - "step": 32070 - }, - { - "epoch": 0.28359765908166695, - "grad_norm": 1.9491114616394043, - "learning_rate": 4.5273372348638886e-05, - "loss": 0.7507, - "step": 32080 - }, - { - "epoch": 0.2836860623419792, - "grad_norm": 2.4328441619873047, - "learning_rate": 4.527189896096702e-05, - "loss": 0.7115, - "step": 32090 - }, - { - "epoch": 0.2837744656022914, - "grad_norm": 5.653671741485596, - "learning_rate": 4.527042557329514e-05, - "loss": 0.7711, - "step": 32100 - }, - { - "epoch": 0.28386286886260365, - "grad_norm": 5.98483943939209, - "learning_rate": 4.526895218562328e-05, - "loss": 0.7459, - "step": 32110 - }, - { - "epoch": 0.2839512721229159, - "grad_norm": 3.305734634399414, - "learning_rate": 4.52674787979514e-05, - "loss": 0.5808, - "step": 32120 - }, - { - "epoch": 0.2840396753832281, - "grad_norm": 3.777399778366089, - "learning_rate": 4.5266005410279534e-05, - "loss": 0.7134, - "step": 32130 - }, - { - "epoch": 0.2841280786435404, - "grad_norm": 4.952066421508789, - "learning_rate": 4.526453202260766e-05, - "loss": 0.7596, - "step": 32140 - }, - { - "epoch": 0.2842164819038526, - "grad_norm": 7.45338773727417, - "learning_rate": 4.526305863493579e-05, - "loss": 0.7016, - "step": 32150 - }, - { - "epoch": 0.28430488516416486, - "grad_norm": 4.668613910675049, - "learning_rate": 4.526158524726392e-05, - "loss": 0.7366, - "step": 32160 - }, - { - "epoch": 0.2843932884244771, - "grad_norm": 3.1431198120117188, - "learning_rate": 4.5260111859592054e-05, - "loss": 0.6675, - "step": 32170 - }, - { - "epoch": 0.2844816916847893, - "grad_norm": 5.466425895690918, - "learning_rate": 4.5258638471920176e-05, - "loss": 0.8678, - "step": 32180 - }, - { - "epoch": 0.28457009494510155, - "grad_norm": 8.157071113586426, - "learning_rate": 4.525716508424831e-05, - "loss": 0.8261, - "step": 32190 - }, - { - "epoch": 0.28465849820541383, - "grad_norm": 2.376061201095581, - "learning_rate": 4.525569169657644e-05, - "loss": 0.7547, - "step": 32200 - }, - { - "epoch": 0.28474690146572607, - "grad_norm": 3.0361568927764893, - "learning_rate": 4.525421830890457e-05, - "loss": 0.8647, - "step": 32210 - }, - { - "epoch": 0.2848353047260383, - "grad_norm": 4.107792377471924, - "learning_rate": 4.5252744921232696e-05, - "loss": 0.77, - "step": 32220 - }, - { - "epoch": 0.2849237079863505, - "grad_norm": 4.00471830368042, - "learning_rate": 4.5251271533560825e-05, - "loss": 0.7364, - "step": 32230 - }, - { - "epoch": 0.28501211124666276, - "grad_norm": 2.015693426132202, - "learning_rate": 4.524979814588895e-05, - "loss": 0.6751, - "step": 32240 - }, - { - "epoch": 0.28510051450697504, - "grad_norm": 3.4149277210235596, - "learning_rate": 4.524832475821709e-05, - "loss": 0.6419, - "step": 32250 - }, - { - "epoch": 0.2851889177672873, - "grad_norm": 2.5606470108032227, - "learning_rate": 4.524685137054521e-05, - "loss": 0.6333, - "step": 32260 - }, - { - "epoch": 0.2852773210275995, - "grad_norm": 6.284425258636475, - "learning_rate": 4.5245377982873345e-05, - "loss": 0.7133, - "step": 32270 - }, - { - "epoch": 0.28536572428791174, - "grad_norm": 3.353984832763672, - "learning_rate": 4.524390459520147e-05, - "loss": 0.7698, - "step": 32280 - }, - { - "epoch": 0.28545412754822397, - "grad_norm": 5.407648086547852, - "learning_rate": 4.52424312075296e-05, - "loss": 0.8678, - "step": 32290 - }, - { - "epoch": 0.2855425308085362, - "grad_norm": 6.983589172363281, - "learning_rate": 4.524095781985773e-05, - "loss": 0.7221, - "step": 32300 - }, - { - "epoch": 0.2856309340688485, - "grad_norm": 2.404683828353882, - "learning_rate": 4.5239484432185865e-05, - "loss": 0.8152, - "step": 32310 - }, - { - "epoch": 0.2857193373291607, - "grad_norm": 4.022778511047363, - "learning_rate": 4.5238011044513987e-05, - "loss": 0.7457, - "step": 32320 - }, - { - "epoch": 0.28580774058947295, - "grad_norm": 3.6083014011383057, - "learning_rate": 4.523653765684212e-05, - "loss": 0.6555, - "step": 32330 - }, - { - "epoch": 0.2858961438497852, - "grad_norm": 3.905674695968628, - "learning_rate": 4.523506426917024e-05, - "loss": 0.7194, - "step": 32340 - }, - { - "epoch": 0.2859845471100974, - "grad_norm": 3.084463596343994, - "learning_rate": 4.523359088149838e-05, - "loss": 0.9105, - "step": 32350 - }, - { - "epoch": 0.28607295037040964, - "grad_norm": 4.024795055389404, - "learning_rate": 4.523211749382651e-05, - "loss": 0.66, - "step": 32360 - }, - { - "epoch": 0.2861613536307219, - "grad_norm": 3.0946974754333496, - "learning_rate": 4.5230644106154635e-05, - "loss": 0.6783, - "step": 32370 - }, - { - "epoch": 0.28624975689103416, - "grad_norm": 9.112564086914062, - "learning_rate": 4.5229170718482763e-05, - "loss": 0.7838, - "step": 32380 - }, - { - "epoch": 0.2863381601513464, - "grad_norm": 8.240164756774902, - "learning_rate": 4.52276973308109e-05, - "loss": 0.7674, - "step": 32390 - }, - { - "epoch": 0.2864265634116586, - "grad_norm": 5.458837985992432, - "learning_rate": 4.522622394313902e-05, - "loss": 0.7803, - "step": 32400 - }, - { - "epoch": 0.28651496667197085, - "grad_norm": 2.699753999710083, - "learning_rate": 4.5224750555467155e-05, - "loss": 0.775, - "step": 32410 - }, - { - "epoch": 0.2866033699322831, - "grad_norm": 3.461235523223877, - "learning_rate": 4.5223277167795284e-05, - "loss": 0.6948, - "step": 32420 - }, - { - "epoch": 0.28669177319259537, - "grad_norm": 3.9574456214904785, - "learning_rate": 4.522180378012341e-05, - "loss": 0.7613, - "step": 32430 - }, - { - "epoch": 0.2867801764529076, - "grad_norm": 1.863532304763794, - "learning_rate": 4.522033039245154e-05, - "loss": 0.6545, - "step": 32440 - }, - { - "epoch": 0.28686857971321983, - "grad_norm": 2.3592708110809326, - "learning_rate": 4.5218857004779676e-05, - "loss": 0.802, - "step": 32450 - }, - { - "epoch": 0.28695698297353206, - "grad_norm": 5.686901092529297, - "learning_rate": 4.52173836171078e-05, - "loss": 0.747, - "step": 32460 - }, - { - "epoch": 0.2870453862338443, - "grad_norm": 6.838834762573242, - "learning_rate": 4.521591022943593e-05, - "loss": 0.6799, - "step": 32470 - }, - { - "epoch": 0.2871337894941565, - "grad_norm": 6.67786169052124, - "learning_rate": 4.521443684176406e-05, - "loss": 0.6916, - "step": 32480 - }, - { - "epoch": 0.2872221927544688, - "grad_norm": 3.9122962951660156, - "learning_rate": 4.521296345409219e-05, - "loss": 0.7445, - "step": 32490 - }, - { - "epoch": 0.28731059601478104, - "grad_norm": 2.963627576828003, - "learning_rate": 4.521149006642032e-05, - "loss": 0.747, - "step": 32500 - }, - { - "epoch": 0.28739899927509327, - "grad_norm": 5.150475978851318, - "learning_rate": 4.5210016678748446e-05, - "loss": 0.8416, - "step": 32510 - }, - { - "epoch": 0.2874874025354055, - "grad_norm": 4.771765232086182, - "learning_rate": 4.5208543291076574e-05, - "loss": 0.6932, - "step": 32520 - }, - { - "epoch": 0.28757580579571773, - "grad_norm": 6.81682825088501, - "learning_rate": 4.520706990340471e-05, - "loss": 0.7802, - "step": 32530 - }, - { - "epoch": 0.28766420905602996, - "grad_norm": 4.43913459777832, - "learning_rate": 4.520559651573284e-05, - "loss": 0.8444, - "step": 32540 - }, - { - "epoch": 0.28775261231634225, - "grad_norm": 4.907598972320557, - "learning_rate": 4.5204123128060966e-05, - "loss": 0.8626, - "step": 32550 - }, - { - "epoch": 0.2878410155766545, - "grad_norm": 2.7618095874786377, - "learning_rate": 4.5202649740389094e-05, - "loss": 0.6698, - "step": 32560 - }, - { - "epoch": 0.2879294188369667, - "grad_norm": 1.5626672506332397, - "learning_rate": 4.520117635271722e-05, - "loss": 0.6031, - "step": 32570 - }, - { - "epoch": 0.28801782209727894, - "grad_norm": 4.612063407897949, - "learning_rate": 4.519970296504535e-05, - "loss": 0.6114, - "step": 32580 - }, - { - "epoch": 0.2881062253575912, - "grad_norm": 2.4098236560821533, - "learning_rate": 4.519822957737348e-05, - "loss": 0.7503, - "step": 32590 - }, - { - "epoch": 0.2881946286179034, - "grad_norm": 4.4840850830078125, - "learning_rate": 4.5196756189701614e-05, - "loss": 0.753, - "step": 32600 - }, - { - "epoch": 0.2882830318782157, - "grad_norm": 4.803959369659424, - "learning_rate": 4.519528280202974e-05, - "loss": 0.6325, - "step": 32610 - }, - { - "epoch": 0.2883714351385279, - "grad_norm": 2.624789237976074, - "learning_rate": 4.519380941435787e-05, - "loss": 0.6635, - "step": 32620 - }, - { - "epoch": 0.28845983839884015, - "grad_norm": 4.030854225158691, - "learning_rate": 4.5192336026686e-05, - "loss": 0.7889, - "step": 32630 - }, - { - "epoch": 0.2885482416591524, - "grad_norm": 6.312045097351074, - "learning_rate": 4.519086263901413e-05, - "loss": 0.866, - "step": 32640 - }, - { - "epoch": 0.2886366449194646, - "grad_norm": 4.309917449951172, - "learning_rate": 4.5189389251342256e-05, - "loss": 0.8799, - "step": 32650 - }, - { - "epoch": 0.28872504817977684, - "grad_norm": 3.6183743476867676, - "learning_rate": 4.518791586367039e-05, - "loss": 0.8994, - "step": 32660 - }, - { - "epoch": 0.28881345144008913, - "grad_norm": 5.355915069580078, - "learning_rate": 4.518644247599852e-05, - "loss": 0.7293, - "step": 32670 - }, - { - "epoch": 0.28890185470040136, - "grad_norm": 7.802880764007568, - "learning_rate": 4.518496908832665e-05, - "loss": 0.7388, - "step": 32680 - }, - { - "epoch": 0.2889902579607136, - "grad_norm": 4.839122772216797, - "learning_rate": 4.5183495700654776e-05, - "loss": 0.7913, - "step": 32690 - }, - { - "epoch": 0.2890786612210258, - "grad_norm": 7.241281509399414, - "learning_rate": 4.5182022312982905e-05, - "loss": 0.8092, - "step": 32700 - }, - { - "epoch": 0.28916706448133805, - "grad_norm": 6.2426838874816895, - "learning_rate": 4.518054892531103e-05, - "loss": 0.8079, - "step": 32710 - }, - { - "epoch": 0.2892554677416503, - "grad_norm": 7.339729309082031, - "learning_rate": 4.517907553763917e-05, - "loss": 0.8304, - "step": 32720 - }, - { - "epoch": 0.28934387100196257, - "grad_norm": 3.2782113552093506, - "learning_rate": 4.517760214996729e-05, - "loss": 0.7398, - "step": 32730 - }, - { - "epoch": 0.2894322742622748, - "grad_norm": 6.81158447265625, - "learning_rate": 4.5176128762295425e-05, - "loss": 0.7231, - "step": 32740 - }, - { - "epoch": 0.28952067752258703, - "grad_norm": 4.297384262084961, - "learning_rate": 4.517465537462355e-05, - "loss": 0.8208, - "step": 32750 - }, - { - "epoch": 0.28960908078289926, - "grad_norm": 1.8786605596542358, - "learning_rate": 4.517318198695168e-05, - "loss": 0.7891, - "step": 32760 - }, - { - "epoch": 0.2896974840432115, - "grad_norm": 4.043199062347412, - "learning_rate": 4.517170859927981e-05, - "loss": 0.6814, - "step": 32770 - }, - { - "epoch": 0.2897858873035238, - "grad_norm": 2.5615527629852295, - "learning_rate": 4.5170235211607945e-05, - "loss": 0.7769, - "step": 32780 - }, - { - "epoch": 0.289874290563836, - "grad_norm": 14.03801441192627, - "learning_rate": 4.516876182393607e-05, - "loss": 0.7883, - "step": 32790 - }, - { - "epoch": 0.28996269382414824, - "grad_norm": 4.790248394012451, - "learning_rate": 4.51672884362642e-05, - "loss": 0.6845, - "step": 32800 - }, - { - "epoch": 0.2900510970844605, - "grad_norm": 3.695822238922119, - "learning_rate": 4.516581504859232e-05, - "loss": 0.6317, - "step": 32810 - }, - { - "epoch": 0.2901395003447727, - "grad_norm": 3.9681992530822754, - "learning_rate": 4.516434166092046e-05, - "loss": 0.835, - "step": 32820 - }, - { - "epoch": 0.29022790360508494, - "grad_norm": 5.208821773529053, - "learning_rate": 4.516286827324859e-05, - "loss": 0.7193, - "step": 32830 - }, - { - "epoch": 0.2903163068653972, - "grad_norm": 2.9277303218841553, - "learning_rate": 4.5161394885576715e-05, - "loss": 0.6765, - "step": 32840 - }, - { - "epoch": 0.29040471012570945, - "grad_norm": 9.771438598632812, - "learning_rate": 4.5159921497904844e-05, - "loss": 0.7952, - "step": 32850 - }, - { - "epoch": 0.2904931133860217, - "grad_norm": 3.2962722778320312, - "learning_rate": 4.515844811023298e-05, - "loss": 0.7132, - "step": 32860 - }, - { - "epoch": 0.2905815166463339, - "grad_norm": 4.077047824859619, - "learning_rate": 4.51569747225611e-05, - "loss": 0.7213, - "step": 32870 - }, - { - "epoch": 0.29066991990664615, - "grad_norm": 5.25209379196167, - "learning_rate": 4.5155501334889235e-05, - "loss": 0.8393, - "step": 32880 - }, - { - "epoch": 0.2907583231669584, - "grad_norm": 3.1044015884399414, - "learning_rate": 4.5154027947217364e-05, - "loss": 0.8898, - "step": 32890 - }, - { - "epoch": 0.29084672642727066, - "grad_norm": 4.176365852355957, - "learning_rate": 4.515255455954549e-05, - "loss": 0.6721, - "step": 32900 - }, - { - "epoch": 0.2909351296875829, - "grad_norm": 5.453918933868408, - "learning_rate": 4.515108117187362e-05, - "loss": 0.8482, - "step": 32910 - }, - { - "epoch": 0.2910235329478951, - "grad_norm": 4.604106903076172, - "learning_rate": 4.5149607784201756e-05, - "loss": 0.7786, - "step": 32920 - }, - { - "epoch": 0.29111193620820736, - "grad_norm": 6.950430393218994, - "learning_rate": 4.514813439652988e-05, - "loss": 0.7997, - "step": 32930 - }, - { - "epoch": 0.2912003394685196, - "grad_norm": 2.0689749717712402, - "learning_rate": 4.514666100885801e-05, - "loss": 0.756, - "step": 32940 - }, - { - "epoch": 0.2912887427288318, - "grad_norm": 3.6594607830047607, - "learning_rate": 4.5145187621186134e-05, - "loss": 0.7576, - "step": 32950 - }, - { - "epoch": 0.2913771459891441, - "grad_norm": 3.749756097793579, - "learning_rate": 4.514371423351427e-05, - "loss": 0.667, - "step": 32960 - }, - { - "epoch": 0.29146554924945633, - "grad_norm": 2.4697751998901367, - "learning_rate": 4.51422408458424e-05, - "loss": 0.945, - "step": 32970 - }, - { - "epoch": 0.29155395250976857, - "grad_norm": 9.962291717529297, - "learning_rate": 4.5140767458170526e-05, - "loss": 0.7015, - "step": 32980 - }, - { - "epoch": 0.2916423557700808, - "grad_norm": 10.507822036743164, - "learning_rate": 4.5139294070498654e-05, - "loss": 0.6948, - "step": 32990 - }, - { - "epoch": 0.291730759030393, - "grad_norm": 5.705935001373291, - "learning_rate": 4.513782068282679e-05, - "loss": 0.6677, - "step": 33000 - }, - { - "epoch": 0.29181916229070526, - "grad_norm": 2.1423537731170654, - "learning_rate": 4.513634729515491e-05, - "loss": 0.6951, - "step": 33010 - }, - { - "epoch": 0.29190756555101754, - "grad_norm": 5.549307823181152, - "learning_rate": 4.5134873907483046e-05, - "loss": 0.7208, - "step": 33020 - }, - { - "epoch": 0.2919959688113298, - "grad_norm": 3.8369317054748535, - "learning_rate": 4.5133400519811174e-05, - "loss": 0.7848, - "step": 33030 - }, - { - "epoch": 0.292084372071642, - "grad_norm": 5.902238368988037, - "learning_rate": 4.51319271321393e-05, - "loss": 0.8579, - "step": 33040 - }, - { - "epoch": 0.29217277533195424, - "grad_norm": 2.6832330226898193, - "learning_rate": 4.513045374446743e-05, - "loss": 0.738, - "step": 33050 - }, - { - "epoch": 0.29226117859226647, - "grad_norm": 5.537685394287109, - "learning_rate": 4.512898035679556e-05, - "loss": 0.7045, - "step": 33060 - }, - { - "epoch": 0.2923495818525787, - "grad_norm": 4.328697681427002, - "learning_rate": 4.512750696912369e-05, - "loss": 0.8025, - "step": 33070 - }, - { - "epoch": 0.292437985112891, - "grad_norm": 1.3331048488616943, - "learning_rate": 4.512603358145182e-05, - "loss": 0.7723, - "step": 33080 - }, - { - "epoch": 0.2925263883732032, - "grad_norm": 5.987185001373291, - "learning_rate": 4.5124560193779944e-05, - "loss": 0.7358, - "step": 33090 - }, - { - "epoch": 0.29261479163351545, - "grad_norm": 3.3136699199676514, - "learning_rate": 4.512308680610808e-05, - "loss": 0.7887, - "step": 33100 - }, - { - "epoch": 0.2927031948938277, - "grad_norm": 2.6293253898620605, - "learning_rate": 4.512161341843621e-05, - "loss": 0.7684, - "step": 33110 - }, - { - "epoch": 0.2927915981541399, - "grad_norm": 3.0009572505950928, - "learning_rate": 4.5120140030764336e-05, - "loss": 0.7661, - "step": 33120 - }, - { - "epoch": 0.29288000141445214, - "grad_norm": 3.628920555114746, - "learning_rate": 4.5118666643092465e-05, - "loss": 0.6792, - "step": 33130 - }, - { - "epoch": 0.2929684046747644, - "grad_norm": 5.839208602905273, - "learning_rate": 4.51171932554206e-05, - "loss": 0.6378, - "step": 33140 - }, - { - "epoch": 0.29305680793507666, - "grad_norm": 8.483647346496582, - "learning_rate": 4.511571986774872e-05, - "loss": 0.6737, - "step": 33150 - }, - { - "epoch": 0.2931452111953889, - "grad_norm": 6.439718723297119, - "learning_rate": 4.5114246480076856e-05, - "loss": 0.7123, - "step": 33160 - }, - { - "epoch": 0.2932336144557011, - "grad_norm": 3.1471869945526123, - "learning_rate": 4.511277309240498e-05, - "loss": 0.8745, - "step": 33170 - }, - { - "epoch": 0.29332201771601335, - "grad_norm": 2.6022372245788574, - "learning_rate": 4.511129970473311e-05, - "loss": 0.7852, - "step": 33180 - }, - { - "epoch": 0.2934104209763256, - "grad_norm": 7.2737555503845215, - "learning_rate": 4.510982631706124e-05, - "loss": 0.7135, - "step": 33190 - }, - { - "epoch": 0.29349882423663787, - "grad_norm": 2.462229013442993, - "learning_rate": 4.510835292938937e-05, - "loss": 0.6438, - "step": 33200 - }, - { - "epoch": 0.2935872274969501, - "grad_norm": 7.040503025054932, - "learning_rate": 4.51068795417175e-05, - "loss": 0.6436, - "step": 33210 - }, - { - "epoch": 0.29367563075726233, - "grad_norm": 4.6756110191345215, - "learning_rate": 4.510540615404563e-05, - "loss": 0.7018, - "step": 33220 - }, - { - "epoch": 0.29376403401757456, - "grad_norm": 18.167781829833984, - "learning_rate": 4.5103932766373755e-05, - "loss": 0.7519, - "step": 33230 - }, - { - "epoch": 0.2938524372778868, - "grad_norm": 4.194391250610352, - "learning_rate": 4.510245937870189e-05, - "loss": 0.7561, - "step": 33240 - }, - { - "epoch": 0.293940840538199, - "grad_norm": 16.583396911621094, - "learning_rate": 4.510098599103002e-05, - "loss": 0.6939, - "step": 33250 - }, - { - "epoch": 0.2940292437985113, - "grad_norm": 3.5381343364715576, - "learning_rate": 4.509951260335815e-05, - "loss": 0.8336, - "step": 33260 - }, - { - "epoch": 0.29411764705882354, - "grad_norm": 2.9034624099731445, - "learning_rate": 4.5098039215686275e-05, - "loss": 0.6719, - "step": 33270 - }, - { - "epoch": 0.29420605031913577, - "grad_norm": 7.28995418548584, - "learning_rate": 4.509656582801441e-05, - "loss": 0.7327, - "step": 33280 - }, - { - "epoch": 0.294294453579448, - "grad_norm": 7.901029586791992, - "learning_rate": 4.509509244034253e-05, - "loss": 0.9311, - "step": 33290 - }, - { - "epoch": 0.29438285683976023, - "grad_norm": 2.0050694942474365, - "learning_rate": 4.509361905267067e-05, - "loss": 0.6719, - "step": 33300 - }, - { - "epoch": 0.2944712601000725, - "grad_norm": 1.942972183227539, - "learning_rate": 4.509214566499879e-05, - "loss": 0.7469, - "step": 33310 - }, - { - "epoch": 0.29455966336038475, - "grad_norm": 10.133966445922852, - "learning_rate": 4.5090672277326924e-05, - "loss": 0.7659, - "step": 33320 - }, - { - "epoch": 0.294648066620697, - "grad_norm": 8.564141273498535, - "learning_rate": 4.508919888965505e-05, - "loss": 0.6516, - "step": 33330 - }, - { - "epoch": 0.2947364698810092, - "grad_norm": 7.872227668762207, - "learning_rate": 4.508772550198318e-05, - "loss": 0.7498, - "step": 33340 - }, - { - "epoch": 0.29482487314132144, - "grad_norm": 3.1297037601470947, - "learning_rate": 4.508625211431131e-05, - "loss": 0.6493, - "step": 33350 - }, - { - "epoch": 0.2949132764016337, - "grad_norm": 2.6965694427490234, - "learning_rate": 4.5084778726639444e-05, - "loss": 0.8029, - "step": 33360 - }, - { - "epoch": 0.29500167966194596, - "grad_norm": 1.995500087738037, - "learning_rate": 4.5083305338967565e-05, - "loss": 0.7052, - "step": 33370 - }, - { - "epoch": 0.2950900829222582, - "grad_norm": 3.699645757675171, - "learning_rate": 4.50818319512957e-05, - "loss": 0.8108, - "step": 33380 - }, - { - "epoch": 0.2951784861825704, - "grad_norm": 3.0953612327575684, - "learning_rate": 4.508035856362383e-05, - "loss": 0.8239, - "step": 33390 - }, - { - "epoch": 0.29526688944288265, - "grad_norm": 2.6699650287628174, - "learning_rate": 4.507888517595196e-05, - "loss": 0.7155, - "step": 33400 - }, - { - "epoch": 0.2953552927031949, - "grad_norm": 3.6951022148132324, - "learning_rate": 4.5077411788280086e-05, - "loss": 0.8089, - "step": 33410 - }, - { - "epoch": 0.2954436959635071, - "grad_norm": 13.246560096740723, - "learning_rate": 4.5075938400608214e-05, - "loss": 0.8607, - "step": 33420 - }, - { - "epoch": 0.2955320992238194, - "grad_norm": 2.9321255683898926, - "learning_rate": 4.507446501293634e-05, - "loss": 0.6976, - "step": 33430 - }, - { - "epoch": 0.29562050248413163, - "grad_norm": 4.297573566436768, - "learning_rate": 4.507299162526448e-05, - "loss": 0.7081, - "step": 33440 - }, - { - "epoch": 0.29570890574444386, - "grad_norm": 1.6129165887832642, - "learning_rate": 4.5071518237592606e-05, - "loss": 0.7055, - "step": 33450 - }, - { - "epoch": 0.2957973090047561, - "grad_norm": 5.101630210876465, - "learning_rate": 4.5070044849920734e-05, - "loss": 0.7261, - "step": 33460 - }, - { - "epoch": 0.2958857122650683, - "grad_norm": 2.3036487102508545, - "learning_rate": 4.506857146224886e-05, - "loss": 0.8177, - "step": 33470 - }, - { - "epoch": 0.29597411552538055, - "grad_norm": 10.941174507141113, - "learning_rate": 4.506709807457699e-05, - "loss": 0.7958, - "step": 33480 - }, - { - "epoch": 0.29606251878569284, - "grad_norm": 5.281665802001953, - "learning_rate": 4.506562468690512e-05, - "loss": 0.6946, - "step": 33490 - }, - { - "epoch": 0.29615092204600507, - "grad_norm": 6.0411152839660645, - "learning_rate": 4.5064151299233254e-05, - "loss": 0.7124, - "step": 33500 - }, - { - "epoch": 0.2962393253063173, - "grad_norm": 2.2550160884857178, - "learning_rate": 4.506267791156138e-05, - "loss": 0.6775, - "step": 33510 - }, - { - "epoch": 0.29632772856662953, - "grad_norm": 1.734143614768982, - "learning_rate": 4.506120452388951e-05, - "loss": 0.6546, - "step": 33520 - }, - { - "epoch": 0.29641613182694176, - "grad_norm": 4.533917427062988, - "learning_rate": 4.505973113621764e-05, - "loss": 0.789, - "step": 33530 - }, - { - "epoch": 0.296504535087254, - "grad_norm": 2.692505359649658, - "learning_rate": 4.505825774854577e-05, - "loss": 0.84, - "step": 33540 - }, - { - "epoch": 0.2965929383475663, - "grad_norm": 4.516151428222656, - "learning_rate": 4.5056784360873896e-05, - "loss": 0.7122, - "step": 33550 - }, - { - "epoch": 0.2966813416078785, - "grad_norm": 3.1818645000457764, - "learning_rate": 4.5055310973202024e-05, - "loss": 0.821, - "step": 33560 - }, - { - "epoch": 0.29676974486819074, - "grad_norm": 2.779891014099121, - "learning_rate": 4.505383758553016e-05, - "loss": 0.7608, - "step": 33570 - }, - { - "epoch": 0.296858148128503, - "grad_norm": 6.305975437164307, - "learning_rate": 4.505236419785829e-05, - "loss": 0.8242, - "step": 33580 - }, - { - "epoch": 0.2969465513888152, - "grad_norm": 2.454766273498535, - "learning_rate": 4.5050890810186416e-05, - "loss": 0.7936, - "step": 33590 - }, - { - "epoch": 0.29703495464912744, - "grad_norm": 3.8064215183258057, - "learning_rate": 4.5049417422514545e-05, - "loss": 0.7833, - "step": 33600 - }, - { - "epoch": 0.2971233579094397, - "grad_norm": 7.597683429718018, - "learning_rate": 4.504794403484267e-05, - "loss": 0.7053, - "step": 33610 - }, - { - "epoch": 0.29721176116975195, - "grad_norm": 9.404048919677734, - "learning_rate": 4.50464706471708e-05, - "loss": 0.7911, - "step": 33620 - }, - { - "epoch": 0.2973001644300642, - "grad_norm": 5.486915111541748, - "learning_rate": 4.5044997259498936e-05, - "loss": 0.7071, - "step": 33630 - }, - { - "epoch": 0.2973885676903764, - "grad_norm": 6.558414459228516, - "learning_rate": 4.504352387182706e-05, - "loss": 0.7104, - "step": 33640 - }, - { - "epoch": 0.29747697095068865, - "grad_norm": 7.517357349395752, - "learning_rate": 4.504205048415519e-05, - "loss": 0.6982, - "step": 33650 - }, - { - "epoch": 0.2975653742110009, - "grad_norm": 4.337510585784912, - "learning_rate": 4.504057709648332e-05, - "loss": 0.7708, - "step": 33660 - }, - { - "epoch": 0.29765377747131316, - "grad_norm": 3.676593065261841, - "learning_rate": 4.503910370881145e-05, - "loss": 0.7596, - "step": 33670 - }, - { - "epoch": 0.2977421807316254, - "grad_norm": 5.71250581741333, - "learning_rate": 4.503763032113958e-05, - "loss": 0.7676, - "step": 33680 - }, - { - "epoch": 0.2978305839919376, - "grad_norm": 2.9847748279571533, - "learning_rate": 4.503615693346771e-05, - "loss": 0.7273, - "step": 33690 - }, - { - "epoch": 0.29791898725224986, - "grad_norm": 2.5993552207946777, - "learning_rate": 4.5034683545795835e-05, - "loss": 0.8274, - "step": 33700 - }, - { - "epoch": 0.2980073905125621, - "grad_norm": 2.1354825496673584, - "learning_rate": 4.503321015812397e-05, - "loss": 0.8048, - "step": 33710 - }, - { - "epoch": 0.2980957937728743, - "grad_norm": 6.078588962554932, - "learning_rate": 4.50317367704521e-05, - "loss": 0.7835, - "step": 33720 - }, - { - "epoch": 0.2981841970331866, - "grad_norm": 5.610902309417725, - "learning_rate": 4.503026338278023e-05, - "loss": 0.7088, - "step": 33730 - }, - { - "epoch": 0.29827260029349884, - "grad_norm": 1.3966268301010132, - "learning_rate": 4.5028789995108355e-05, - "loss": 0.6612, - "step": 33740 - }, - { - "epoch": 0.29836100355381107, - "grad_norm": 5.88723087310791, - "learning_rate": 4.502731660743649e-05, - "loss": 0.7465, - "step": 33750 - }, - { - "epoch": 0.2984494068141233, - "grad_norm": 2.5628538131713867, - "learning_rate": 4.502584321976461e-05, - "loss": 0.6704, - "step": 33760 - }, - { - "epoch": 0.29853781007443553, - "grad_norm": 5.098702907562256, - "learning_rate": 4.502436983209275e-05, - "loss": 0.7279, - "step": 33770 - }, - { - "epoch": 0.29862621333474776, - "grad_norm": 2.44887113571167, - "learning_rate": 4.502289644442087e-05, - "loss": 0.7042, - "step": 33780 - }, - { - "epoch": 0.29871461659506005, - "grad_norm": 4.624105930328369, - "learning_rate": 4.5021423056749004e-05, - "loss": 0.7581, - "step": 33790 - }, - { - "epoch": 0.2988030198553723, - "grad_norm": 5.336048126220703, - "learning_rate": 4.501994966907713e-05, - "loss": 0.6621, - "step": 33800 - }, - { - "epoch": 0.2988914231156845, - "grad_norm": 29.99491310119629, - "learning_rate": 4.501847628140526e-05, - "loss": 0.7816, - "step": 33810 - }, - { - "epoch": 0.29897982637599674, - "grad_norm": 1.6243245601654053, - "learning_rate": 4.501700289373339e-05, - "loss": 0.6673, - "step": 33820 - }, - { - "epoch": 0.29906822963630897, - "grad_norm": 5.088352680206299, - "learning_rate": 4.5015529506061524e-05, - "loss": 0.7942, - "step": 33830 - }, - { - "epoch": 0.29915663289662126, - "grad_norm": 7.78596830368042, - "learning_rate": 4.5014056118389645e-05, - "loss": 0.7443, - "step": 33840 - }, - { - "epoch": 0.2992450361569335, - "grad_norm": 3.9854860305786133, - "learning_rate": 4.501258273071778e-05, - "loss": 0.7123, - "step": 33850 - }, - { - "epoch": 0.2993334394172457, - "grad_norm": 7.866239547729492, - "learning_rate": 4.501110934304591e-05, - "loss": 0.8152, - "step": 33860 - }, - { - "epoch": 0.29942184267755795, - "grad_norm": 5.544895172119141, - "learning_rate": 4.500963595537404e-05, - "loss": 0.7945, - "step": 33870 - }, - { - "epoch": 0.2995102459378702, - "grad_norm": 2.7663767337799072, - "learning_rate": 4.5008162567702166e-05, - "loss": 0.785, - "step": 33880 - }, - { - "epoch": 0.2995986491981824, - "grad_norm": 7.807943344116211, - "learning_rate": 4.5006689180030294e-05, - "loss": 0.7619, - "step": 33890 - }, - { - "epoch": 0.2996870524584947, - "grad_norm": 5.797680854797363, - "learning_rate": 4.500521579235842e-05, - "loss": 0.7638, - "step": 33900 - }, - { - "epoch": 0.2997754557188069, - "grad_norm": 2.5137417316436768, - "learning_rate": 4.500374240468656e-05, - "loss": 0.7784, - "step": 33910 - }, - { - "epoch": 0.29986385897911916, - "grad_norm": 3.4669203758239746, - "learning_rate": 4.500226901701468e-05, - "loss": 0.7297, - "step": 33920 - }, - { - "epoch": 0.2999522622394314, - "grad_norm": 2.0675132274627686, - "learning_rate": 4.5000795629342814e-05, - "loss": 0.6618, - "step": 33930 - }, - { - "epoch": 0.3000406654997436, - "grad_norm": 5.007786750793457, - "learning_rate": 4.499932224167094e-05, - "loss": 0.6963, - "step": 33940 - }, - { - "epoch": 0.30012906876005585, - "grad_norm": 3.6443653106689453, - "learning_rate": 4.499784885399907e-05, - "loss": 0.6755, - "step": 33950 - }, - { - "epoch": 0.30021747202036814, - "grad_norm": 6.972654819488525, - "learning_rate": 4.49963754663272e-05, - "loss": 0.786, - "step": 33960 - }, - { - "epoch": 0.30030587528068037, - "grad_norm": 14.206008911132812, - "learning_rate": 4.4994902078655334e-05, - "loss": 0.7552, - "step": 33970 - }, - { - "epoch": 0.3003942785409926, - "grad_norm": 4.875217437744141, - "learning_rate": 4.4993428690983456e-05, - "loss": 0.7843, - "step": 33980 - }, - { - "epoch": 0.30048268180130483, - "grad_norm": 2.0247178077697754, - "learning_rate": 4.499195530331159e-05, - "loss": 0.77, - "step": 33990 - }, - { - "epoch": 0.30057108506161706, - "grad_norm": 5.592731952667236, - "learning_rate": 4.499048191563971e-05, - "loss": 0.7991, - "step": 34000 - }, - { - "epoch": 0.3006594883219293, - "grad_norm": 4.4301605224609375, - "learning_rate": 4.498900852796785e-05, - "loss": 0.6283, - "step": 34010 - }, - { - "epoch": 0.3007478915822416, - "grad_norm": 10.852659225463867, - "learning_rate": 4.4987535140295976e-05, - "loss": 0.8067, - "step": 34020 - }, - { - "epoch": 0.3008362948425538, - "grad_norm": 4.8646016120910645, - "learning_rate": 4.4986061752624105e-05, - "loss": 0.6956, - "step": 34030 - }, - { - "epoch": 0.30092469810286604, - "grad_norm": 2.4376742839813232, - "learning_rate": 4.498458836495223e-05, - "loss": 0.6968, - "step": 34040 - }, - { - "epoch": 0.30101310136317827, - "grad_norm": 5.311902046203613, - "learning_rate": 4.498311497728037e-05, - "loss": 0.6929, - "step": 34050 - }, - { - "epoch": 0.3011015046234905, - "grad_norm": 2.5958940982818604, - "learning_rate": 4.498164158960849e-05, - "loss": 0.8462, - "step": 34060 - }, - { - "epoch": 0.30118990788380273, - "grad_norm": 3.115326404571533, - "learning_rate": 4.4980168201936625e-05, - "loss": 0.7645, - "step": 34070 - }, - { - "epoch": 0.301278311144115, - "grad_norm": 2.6564133167266846, - "learning_rate": 4.497869481426475e-05, - "loss": 0.7752, - "step": 34080 - }, - { - "epoch": 0.30136671440442725, - "grad_norm": 2.4497995376586914, - "learning_rate": 4.497722142659288e-05, - "loss": 0.6383, - "step": 34090 - }, - { - "epoch": 0.3014551176647395, - "grad_norm": 1.471512794494629, - "learning_rate": 4.497574803892101e-05, - "loss": 0.7211, - "step": 34100 - }, - { - "epoch": 0.3015435209250517, - "grad_norm": 6.132297992706299, - "learning_rate": 4.497427465124914e-05, - "loss": 0.8168, - "step": 34110 - }, - { - "epoch": 0.30163192418536394, - "grad_norm": 6.400779724121094, - "learning_rate": 4.4972801263577266e-05, - "loss": 0.5699, - "step": 34120 - }, - { - "epoch": 0.3017203274456762, - "grad_norm": 3.533576488494873, - "learning_rate": 4.49713278759054e-05, - "loss": 0.729, - "step": 34130 - }, - { - "epoch": 0.30180873070598846, - "grad_norm": 5.203378677368164, - "learning_rate": 4.496985448823352e-05, - "loss": 0.7799, - "step": 34140 - }, - { - "epoch": 0.3018971339663007, - "grad_norm": 5.792718410491943, - "learning_rate": 4.496838110056166e-05, - "loss": 0.7584, - "step": 34150 - }, - { - "epoch": 0.3019855372266129, - "grad_norm": 9.333540916442871, - "learning_rate": 4.496690771288979e-05, - "loss": 0.5008, - "step": 34160 - }, - { - "epoch": 0.30207394048692515, - "grad_norm": 2.0677549839019775, - "learning_rate": 4.4965434325217915e-05, - "loss": 0.8411, - "step": 34170 - }, - { - "epoch": 0.3021623437472374, - "grad_norm": 3.623750925064087, - "learning_rate": 4.496396093754604e-05, - "loss": 0.7233, - "step": 34180 - }, - { - "epoch": 0.3022507470075496, - "grad_norm": 3.4018473625183105, - "learning_rate": 4.496248754987418e-05, - "loss": 0.6843, - "step": 34190 - }, - { - "epoch": 0.3023391502678619, - "grad_norm": 7.940188407897949, - "learning_rate": 4.49610141622023e-05, - "loss": 0.8278, - "step": 34200 - }, - { - "epoch": 0.30242755352817413, - "grad_norm": 4.774045944213867, - "learning_rate": 4.4959540774530435e-05, - "loss": 0.6959, - "step": 34210 - }, - { - "epoch": 0.30251595678848636, - "grad_norm": 1.9966486692428589, - "learning_rate": 4.4958067386858564e-05, - "loss": 0.6778, - "step": 34220 - }, - { - "epoch": 0.3026043600487986, - "grad_norm": 3.796740770339966, - "learning_rate": 4.495659399918669e-05, - "loss": 0.8041, - "step": 34230 - }, - { - "epoch": 0.3026927633091108, - "grad_norm": 9.700430870056152, - "learning_rate": 4.495512061151482e-05, - "loss": 0.8319, - "step": 34240 - }, - { - "epoch": 0.30278116656942305, - "grad_norm": 2.1992483139038086, - "learning_rate": 4.495364722384295e-05, - "loss": 0.8348, - "step": 34250 - }, - { - "epoch": 0.30286956982973534, - "grad_norm": 2.6925811767578125, - "learning_rate": 4.495217383617108e-05, - "loss": 0.7807, - "step": 34260 - }, - { - "epoch": 0.30295797309004757, - "grad_norm": 4.500965595245361, - "learning_rate": 4.495070044849921e-05, - "loss": 0.7108, - "step": 34270 - }, - { - "epoch": 0.3030463763503598, - "grad_norm": 5.057172775268555, - "learning_rate": 4.4949227060827334e-05, - "loss": 0.8005, - "step": 34280 - }, - { - "epoch": 0.30313477961067203, - "grad_norm": 5.226869583129883, - "learning_rate": 4.494775367315547e-05, - "loss": 0.5941, - "step": 34290 - }, - { - "epoch": 0.30322318287098426, - "grad_norm": 2.1849160194396973, - "learning_rate": 4.49462802854836e-05, - "loss": 0.7457, - "step": 34300 - }, - { - "epoch": 0.3033115861312965, - "grad_norm": 3.0871520042419434, - "learning_rate": 4.4944806897811726e-05, - "loss": 0.7671, - "step": 34310 - }, - { - "epoch": 0.3033999893916088, - "grad_norm": 4.777992248535156, - "learning_rate": 4.4943333510139854e-05, - "loss": 0.7081, - "step": 34320 - }, - { - "epoch": 0.303488392651921, - "grad_norm": 1.8558039665222168, - "learning_rate": 4.494186012246799e-05, - "loss": 0.6043, - "step": 34330 - }, - { - "epoch": 0.30357679591223324, - "grad_norm": 11.023305892944336, - "learning_rate": 4.494038673479611e-05, - "loss": 0.8404, - "step": 34340 - }, - { - "epoch": 0.3036651991725455, - "grad_norm": 4.605042457580566, - "learning_rate": 4.4938913347124246e-05, - "loss": 0.7625, - "step": 34350 - }, - { - "epoch": 0.3037536024328577, - "grad_norm": 2.363144636154175, - "learning_rate": 4.4937439959452374e-05, - "loss": 0.6667, - "step": 34360 - }, - { - "epoch": 0.30384200569316994, - "grad_norm": 26.961488723754883, - "learning_rate": 4.49359665717805e-05, - "loss": 0.8653, - "step": 34370 - }, - { - "epoch": 0.3039304089534822, - "grad_norm": 2.402686595916748, - "learning_rate": 4.493449318410863e-05, - "loss": 0.6584, - "step": 34380 - }, - { - "epoch": 0.30401881221379445, - "grad_norm": 4.610472679138184, - "learning_rate": 4.493301979643676e-05, - "loss": 0.6445, - "step": 34390 - }, - { - "epoch": 0.3041072154741067, - "grad_norm": 1.5059280395507812, - "learning_rate": 4.493154640876489e-05, - "loss": 0.7671, - "step": 34400 - }, - { - "epoch": 0.3041956187344189, - "grad_norm": 3.2649242877960205, - "learning_rate": 4.493007302109302e-05, - "loss": 0.656, - "step": 34410 - }, - { - "epoch": 0.30428402199473115, - "grad_norm": 6.003420352935791, - "learning_rate": 4.492859963342115e-05, - "loss": 0.7645, - "step": 34420 - }, - { - "epoch": 0.30437242525504343, - "grad_norm": 6.091169834136963, - "learning_rate": 4.492712624574928e-05, - "loss": 0.7411, - "step": 34430 - }, - { - "epoch": 0.30446082851535566, - "grad_norm": 3.928722620010376, - "learning_rate": 4.492565285807741e-05, - "loss": 0.8999, - "step": 34440 - }, - { - "epoch": 0.3045492317756679, - "grad_norm": 4.167619228363037, - "learning_rate": 4.4924179470405536e-05, - "loss": 0.8574, - "step": 34450 - }, - { - "epoch": 0.3046376350359801, - "grad_norm": 2.832871675491333, - "learning_rate": 4.4922706082733664e-05, - "loss": 0.7686, - "step": 34460 - }, - { - "epoch": 0.30472603829629236, - "grad_norm": 2.916842222213745, - "learning_rate": 4.492123269506179e-05, - "loss": 0.6476, - "step": 34470 - }, - { - "epoch": 0.3048144415566046, - "grad_norm": 2.728239059448242, - "learning_rate": 4.491975930738993e-05, - "loss": 0.7421, - "step": 34480 - }, - { - "epoch": 0.3049028448169169, - "grad_norm": 7.834898471832275, - "learning_rate": 4.4918285919718056e-05, - "loss": 0.8405, - "step": 34490 - }, - { - "epoch": 0.3049912480772291, - "grad_norm": 3.9580485820770264, - "learning_rate": 4.4916812532046185e-05, - "loss": 0.7717, - "step": 34500 - }, - { - "epoch": 0.30507965133754134, - "grad_norm": 7.409971714019775, - "learning_rate": 4.491533914437431e-05, - "loss": 0.6351, - "step": 34510 - }, - { - "epoch": 0.30516805459785357, - "grad_norm": 9.813450813293457, - "learning_rate": 4.491386575670244e-05, - "loss": 0.625, - "step": 34520 - }, - { - "epoch": 0.3052564578581658, - "grad_norm": 4.363447666168213, - "learning_rate": 4.491239236903057e-05, - "loss": 0.8406, - "step": 34530 - }, - { - "epoch": 0.30534486111847803, - "grad_norm": 13.420183181762695, - "learning_rate": 4.4910918981358705e-05, - "loss": 0.6346, - "step": 34540 - }, - { - "epoch": 0.3054332643787903, - "grad_norm": 2.382375717163086, - "learning_rate": 4.490944559368683e-05, - "loss": 0.6244, - "step": 34550 - }, - { - "epoch": 0.30552166763910255, - "grad_norm": 3.6090288162231445, - "learning_rate": 4.490797220601496e-05, - "loss": 0.7077, - "step": 34560 - }, - { - "epoch": 0.3056100708994148, - "grad_norm": 5.673571586608887, - "learning_rate": 4.490649881834309e-05, - "loss": 0.6857, - "step": 34570 - }, - { - "epoch": 0.305698474159727, - "grad_norm": 18.463489532470703, - "learning_rate": 4.490502543067122e-05, - "loss": 0.6403, - "step": 34580 - }, - { - "epoch": 0.30578687742003924, - "grad_norm": 3.3048393726348877, - "learning_rate": 4.4903552042999347e-05, - "loss": 0.7948, - "step": 34590 - }, - { - "epoch": 0.30587528068035147, - "grad_norm": 4.157071113586426, - "learning_rate": 4.490207865532748e-05, - "loss": 0.7781, - "step": 34600 - }, - { - "epoch": 0.30596368394066376, - "grad_norm": 3.253995180130005, - "learning_rate": 4.49006052676556e-05, - "loss": 0.7411, - "step": 34610 - }, - { - "epoch": 0.306052087200976, - "grad_norm": 7.761780261993408, - "learning_rate": 4.489913187998374e-05, - "loss": 0.7864, - "step": 34620 - }, - { - "epoch": 0.3061404904612882, - "grad_norm": 5.318267822265625, - "learning_rate": 4.489765849231187e-05, - "loss": 0.8214, - "step": 34630 - }, - { - "epoch": 0.30622889372160045, - "grad_norm": 1.8405423164367676, - "learning_rate": 4.4896185104639995e-05, - "loss": 0.6918, - "step": 34640 - }, - { - "epoch": 0.3063172969819127, - "grad_norm": 3.742891788482666, - "learning_rate": 4.4894711716968123e-05, - "loss": 0.7683, - "step": 34650 - }, - { - "epoch": 0.3064057002422249, - "grad_norm": 7.632383346557617, - "learning_rate": 4.489323832929626e-05, - "loss": 0.8693, - "step": 34660 - }, - { - "epoch": 0.3064941035025372, - "grad_norm": 4.455667972564697, - "learning_rate": 4.489176494162438e-05, - "loss": 0.7577, - "step": 34670 - }, - { - "epoch": 0.3065825067628494, - "grad_norm": 7.343852996826172, - "learning_rate": 4.4890291553952515e-05, - "loss": 0.6562, - "step": 34680 - }, - { - "epoch": 0.30667091002316166, - "grad_norm": 3.7397708892822266, - "learning_rate": 4.4888818166280644e-05, - "loss": 0.7118, - "step": 34690 - }, - { - "epoch": 0.3067593132834739, - "grad_norm": 2.8678243160247803, - "learning_rate": 4.488734477860877e-05, - "loss": 0.6978, - "step": 34700 - }, - { - "epoch": 0.3068477165437861, - "grad_norm": 5.855990409851074, - "learning_rate": 4.48858713909369e-05, - "loss": 0.6802, - "step": 34710 - }, - { - "epoch": 0.30693611980409835, - "grad_norm": 4.8320112228393555, - "learning_rate": 4.488439800326503e-05, - "loss": 0.6886, - "step": 34720 - }, - { - "epoch": 0.30702452306441064, - "grad_norm": 4.854421138763428, - "learning_rate": 4.488292461559316e-05, - "loss": 0.8148, - "step": 34730 - }, - { - "epoch": 0.30711292632472287, - "grad_norm": 4.3127055168151855, - "learning_rate": 4.488145122792129e-05, - "loss": 0.8299, - "step": 34740 - }, - { - "epoch": 0.3072013295850351, - "grad_norm": 3.706496477127075, - "learning_rate": 4.4879977840249414e-05, - "loss": 0.6845, - "step": 34750 - }, - { - "epoch": 0.30728973284534733, - "grad_norm": 3.475883960723877, - "learning_rate": 4.487850445257755e-05, - "loss": 0.7979, - "step": 34760 - }, - { - "epoch": 0.30737813610565956, - "grad_norm": 4.517330169677734, - "learning_rate": 4.487703106490568e-05, - "loss": 0.651, - "step": 34770 - }, - { - "epoch": 0.3074665393659718, - "grad_norm": 9.706879615783691, - "learning_rate": 4.4875557677233806e-05, - "loss": 0.8229, - "step": 34780 - }, - { - "epoch": 0.3075549426262841, - "grad_norm": 2.5399911403656006, - "learning_rate": 4.4874084289561934e-05, - "loss": 0.7164, - "step": 34790 - }, - { - "epoch": 0.3076433458865963, - "grad_norm": 8.67696762084961, - "learning_rate": 4.487261090189007e-05, - "loss": 0.7514, - "step": 34800 - }, - { - "epoch": 0.30773174914690854, - "grad_norm": 3.7912492752075195, - "learning_rate": 4.487113751421819e-05, - "loss": 0.8312, - "step": 34810 - }, - { - "epoch": 0.30782015240722077, - "grad_norm": 5.7773356437683105, - "learning_rate": 4.4869664126546326e-05, - "loss": 0.8123, - "step": 34820 - }, - { - "epoch": 0.307908555667533, - "grad_norm": 4.9688639640808105, - "learning_rate": 4.486819073887445e-05, - "loss": 0.7679, - "step": 34830 - }, - { - "epoch": 0.30799695892784523, - "grad_norm": 2.416210889816284, - "learning_rate": 4.486671735120258e-05, - "loss": 0.8568, - "step": 34840 - }, - { - "epoch": 0.3080853621881575, - "grad_norm": 5.57459831237793, - "learning_rate": 4.486524396353071e-05, - "loss": 0.621, - "step": 34850 - }, - { - "epoch": 0.30817376544846975, - "grad_norm": 8.48627758026123, - "learning_rate": 4.486377057585884e-05, - "loss": 0.7955, - "step": 34860 - }, - { - "epoch": 0.308262168708782, - "grad_norm": 3.1546690464019775, - "learning_rate": 4.486229718818697e-05, - "loss": 0.8805, - "step": 34870 - }, - { - "epoch": 0.3083505719690942, - "grad_norm": 5.674517631530762, - "learning_rate": 4.48608238005151e-05, - "loss": 0.7785, - "step": 34880 - }, - { - "epoch": 0.30843897522940644, - "grad_norm": 4.3803815841674805, - "learning_rate": 4.4859350412843224e-05, - "loss": 0.7112, - "step": 34890 - }, - { - "epoch": 0.3085273784897187, - "grad_norm": 3.8419272899627686, - "learning_rate": 4.485787702517136e-05, - "loss": 0.787, - "step": 34900 - }, - { - "epoch": 0.30861578175003096, - "grad_norm": 2.115967273712158, - "learning_rate": 4.485640363749949e-05, - "loss": 0.7202, - "step": 34910 - }, - { - "epoch": 0.3087041850103432, - "grad_norm": 2.223862409591675, - "learning_rate": 4.4854930249827616e-05, - "loss": 0.6737, - "step": 34920 - }, - { - "epoch": 0.3087925882706554, - "grad_norm": 3.2410547733306885, - "learning_rate": 4.4853456862155744e-05, - "loss": 0.8091, - "step": 34930 - }, - { - "epoch": 0.30888099153096765, - "grad_norm": 5.367846488952637, - "learning_rate": 4.485198347448387e-05, - "loss": 0.7883, - "step": 34940 - }, - { - "epoch": 0.3089693947912799, - "grad_norm": 4.547714710235596, - "learning_rate": 4.4850510086812e-05, - "loss": 0.7055, - "step": 34950 - }, - { - "epoch": 0.30905779805159217, - "grad_norm": 5.68997049331665, - "learning_rate": 4.4849036699140136e-05, - "loss": 0.6594, - "step": 34960 - }, - { - "epoch": 0.3091462013119044, - "grad_norm": 2.666247844696045, - "learning_rate": 4.484756331146826e-05, - "loss": 0.7091, - "step": 34970 - }, - { - "epoch": 0.30923460457221663, - "grad_norm": 5.0559983253479, - "learning_rate": 4.484608992379639e-05, - "loss": 0.7026, - "step": 34980 - }, - { - "epoch": 0.30932300783252886, - "grad_norm": 1.8135817050933838, - "learning_rate": 4.484461653612452e-05, - "loss": 0.6423, - "step": 34990 - }, - { - "epoch": 0.3094114110928411, - "grad_norm": 7.472788333892822, - "learning_rate": 4.484314314845265e-05, - "loss": 0.6638, - "step": 35000 - }, - { - "epoch": 0.3094998143531533, - "grad_norm": 3.6537375450134277, - "learning_rate": 4.484166976078078e-05, - "loss": 0.7728, - "step": 35010 - }, - { - "epoch": 0.3095882176134656, - "grad_norm": 4.108083724975586, - "learning_rate": 4.484019637310891e-05, - "loss": 0.7638, - "step": 35020 - }, - { - "epoch": 0.30967662087377784, - "grad_norm": 3.3913795948028564, - "learning_rate": 4.4838722985437035e-05, - "loss": 0.6888, - "step": 35030 - }, - { - "epoch": 0.3097650241340901, - "grad_norm": 6.567787170410156, - "learning_rate": 4.483724959776517e-05, - "loss": 0.6654, - "step": 35040 - }, - { - "epoch": 0.3098534273944023, - "grad_norm": 5.884479522705078, - "learning_rate": 4.483577621009329e-05, - "loss": 0.7992, - "step": 35050 - }, - { - "epoch": 0.30994183065471453, - "grad_norm": 2.6067841053009033, - "learning_rate": 4.483430282242143e-05, - "loss": 0.8537, - "step": 35060 - }, - { - "epoch": 0.31003023391502676, - "grad_norm": 1.8995705842971802, - "learning_rate": 4.4832829434749555e-05, - "loss": 0.6798, - "step": 35070 - }, - { - "epoch": 0.31011863717533905, - "grad_norm": 6.674327850341797, - "learning_rate": 4.483135604707768e-05, - "loss": 0.6286, - "step": 35080 - }, - { - "epoch": 0.3102070404356513, - "grad_norm": 2.3585987091064453, - "learning_rate": 4.482988265940581e-05, - "loss": 0.8528, - "step": 35090 - }, - { - "epoch": 0.3102954436959635, - "grad_norm": 2.9615836143493652, - "learning_rate": 4.482840927173395e-05, - "loss": 0.7392, - "step": 35100 - }, - { - "epoch": 0.31038384695627574, - "grad_norm": 10.153748512268066, - "learning_rate": 4.482693588406207e-05, - "loss": 0.7166, - "step": 35110 - }, - { - "epoch": 0.310472250216588, - "grad_norm": 2.615586042404175, - "learning_rate": 4.4825462496390204e-05, - "loss": 0.7242, - "step": 35120 - }, - { - "epoch": 0.3105606534769002, - "grad_norm": 9.03085708618164, - "learning_rate": 4.482398910871833e-05, - "loss": 0.6931, - "step": 35130 - }, - { - "epoch": 0.3106490567372125, - "grad_norm": 2.5908732414245605, - "learning_rate": 4.482251572104646e-05, - "loss": 0.8251, - "step": 35140 - }, - { - "epoch": 0.3107374599975247, - "grad_norm": 2.6006767749786377, - "learning_rate": 4.482104233337459e-05, - "loss": 0.7147, - "step": 35150 - }, - { - "epoch": 0.31082586325783695, - "grad_norm": 3.929838180541992, - "learning_rate": 4.4819568945702724e-05, - "loss": 0.7676, - "step": 35160 - }, - { - "epoch": 0.3109142665181492, - "grad_norm": 4.024363040924072, - "learning_rate": 4.4818095558030845e-05, - "loss": 0.8565, - "step": 35170 - }, - { - "epoch": 0.3110026697784614, - "grad_norm": 3.61643385887146, - "learning_rate": 4.481662217035898e-05, - "loss": 0.7725, - "step": 35180 - }, - { - "epoch": 0.31109107303877365, - "grad_norm": 11.784936904907227, - "learning_rate": 4.48151487826871e-05, - "loss": 0.76, - "step": 35190 - }, - { - "epoch": 0.31117947629908593, - "grad_norm": 3.1878347396850586, - "learning_rate": 4.481367539501524e-05, - "loss": 0.8009, - "step": 35200 - }, - { - "epoch": 0.31126787955939816, - "grad_norm": 1.882789969444275, - "learning_rate": 4.4812202007343365e-05, - "loss": 0.7287, - "step": 35210 - }, - { - "epoch": 0.3113562828197104, - "grad_norm": 3.1435670852661133, - "learning_rate": 4.4810728619671494e-05, - "loss": 0.6436, - "step": 35220 - }, - { - "epoch": 0.3114446860800226, - "grad_norm": 7.801337718963623, - "learning_rate": 4.480925523199962e-05, - "loss": 0.7324, - "step": 35230 - }, - { - "epoch": 0.31153308934033486, - "grad_norm": 3.7233738899230957, - "learning_rate": 4.480778184432776e-05, - "loss": 0.7875, - "step": 35240 - }, - { - "epoch": 0.3116214926006471, - "grad_norm": 7.069220066070557, - "learning_rate": 4.480630845665588e-05, - "loss": 0.7657, - "step": 35250 - }, - { - "epoch": 0.3117098958609594, - "grad_norm": 1.9100210666656494, - "learning_rate": 4.4804835068984014e-05, - "loss": 0.7953, - "step": 35260 - }, - { - "epoch": 0.3117982991212716, - "grad_norm": 3.015167713165283, - "learning_rate": 4.480336168131214e-05, - "loss": 0.8179, - "step": 35270 - }, - { - "epoch": 0.31188670238158384, - "grad_norm": 3.5229954719543457, - "learning_rate": 4.480188829364027e-05, - "loss": 0.8366, - "step": 35280 - }, - { - "epoch": 0.31197510564189607, - "grad_norm": 2.8934969902038574, - "learning_rate": 4.48004149059684e-05, - "loss": 0.6987, - "step": 35290 - }, - { - "epoch": 0.3120635089022083, - "grad_norm": 2.1959846019744873, - "learning_rate": 4.479894151829653e-05, - "loss": 0.7815, - "step": 35300 - }, - { - "epoch": 0.31215191216252053, - "grad_norm": 2.49710750579834, - "learning_rate": 4.4797468130624656e-05, - "loss": 0.6647, - "step": 35310 - }, - { - "epoch": 0.3122403154228328, - "grad_norm": 2.9548497200012207, - "learning_rate": 4.479599474295279e-05, - "loss": 0.7203, - "step": 35320 - }, - { - "epoch": 0.31232871868314505, - "grad_norm": 3.4954943656921387, - "learning_rate": 4.479452135528092e-05, - "loss": 0.7129, - "step": 35330 - }, - { - "epoch": 0.3124171219434573, - "grad_norm": 7.048424243927002, - "learning_rate": 4.479304796760905e-05, - "loss": 0.6283, - "step": 35340 - }, - { - "epoch": 0.3125055252037695, - "grad_norm": 1.6082574129104614, - "learning_rate": 4.4791574579937176e-05, - "loss": 0.77, - "step": 35350 - }, - { - "epoch": 0.31259392846408174, - "grad_norm": 9.34705924987793, - "learning_rate": 4.4790101192265304e-05, - "loss": 0.7601, - "step": 35360 - }, - { - "epoch": 0.31268233172439397, - "grad_norm": 10.420363426208496, - "learning_rate": 4.478862780459343e-05, - "loss": 0.7167, - "step": 35370 - }, - { - "epoch": 0.31277073498470626, - "grad_norm": 4.788998603820801, - "learning_rate": 4.478715441692157e-05, - "loss": 0.6051, - "step": 35380 - }, - { - "epoch": 0.3128591382450185, - "grad_norm": 3.5290868282318115, - "learning_rate": 4.4785681029249696e-05, - "loss": 0.76, - "step": 35390 - }, - { - "epoch": 0.3129475415053307, - "grad_norm": 7.129870414733887, - "learning_rate": 4.4784207641577825e-05, - "loss": 0.6829, - "step": 35400 - }, - { - "epoch": 0.31303594476564295, - "grad_norm": 2.5844061374664307, - "learning_rate": 4.478273425390595e-05, - "loss": 0.7663, - "step": 35410 - }, - { - "epoch": 0.3131243480259552, - "grad_norm": 2.622905969619751, - "learning_rate": 4.478126086623408e-05, - "loss": 0.7099, - "step": 35420 - }, - { - "epoch": 0.3132127512862674, - "grad_norm": 2.8615331649780273, - "learning_rate": 4.477978747856221e-05, - "loss": 0.7074, - "step": 35430 - }, - { - "epoch": 0.3133011545465797, - "grad_norm": 2.793071985244751, - "learning_rate": 4.477831409089034e-05, - "loss": 0.6503, - "step": 35440 - }, - { - "epoch": 0.3133895578068919, - "grad_norm": 3.404242992401123, - "learning_rate": 4.477684070321847e-05, - "loss": 0.6747, - "step": 35450 - }, - { - "epoch": 0.31347796106720416, - "grad_norm": 3.489241600036621, - "learning_rate": 4.47753673155466e-05, - "loss": 0.6033, - "step": 35460 - }, - { - "epoch": 0.3135663643275164, - "grad_norm": 10.060763359069824, - "learning_rate": 4.477389392787473e-05, - "loss": 0.729, - "step": 35470 - }, - { - "epoch": 0.3136547675878286, - "grad_norm": 6.695730209350586, - "learning_rate": 4.477242054020286e-05, - "loss": 0.6457, - "step": 35480 - }, - { - "epoch": 0.3137431708481409, - "grad_norm": 7.306407451629639, - "learning_rate": 4.4770947152530986e-05, - "loss": 0.7439, - "step": 35490 - }, - { - "epoch": 0.31383157410845314, - "grad_norm": 10.030495643615723, - "learning_rate": 4.4769473764859115e-05, - "loss": 0.6102, - "step": 35500 - }, - { - "epoch": 0.31391997736876537, - "grad_norm": 1.6690642833709717, - "learning_rate": 4.476800037718725e-05, - "loss": 0.6725, - "step": 35510 - }, - { - "epoch": 0.3140083806290776, - "grad_norm": 4.747469425201416, - "learning_rate": 4.476652698951537e-05, - "loss": 0.7041, - "step": 35520 - }, - { - "epoch": 0.31409678388938983, - "grad_norm": 2.086782455444336, - "learning_rate": 4.476505360184351e-05, - "loss": 0.7764, - "step": 35530 - }, - { - "epoch": 0.31418518714970206, - "grad_norm": 2.241271495819092, - "learning_rate": 4.4763580214171635e-05, - "loss": 0.6617, - "step": 35540 - }, - { - "epoch": 0.31427359041001435, - "grad_norm": 4.012430191040039, - "learning_rate": 4.4762106826499763e-05, - "loss": 0.7873, - "step": 35550 - }, - { - "epoch": 0.3143619936703266, - "grad_norm": 12.640499114990234, - "learning_rate": 4.476063343882789e-05, - "loss": 0.7215, - "step": 35560 - }, - { - "epoch": 0.3144503969306388, - "grad_norm": 5.47008752822876, - "learning_rate": 4.475916005115603e-05, - "loss": 0.7184, - "step": 35570 - }, - { - "epoch": 0.31453880019095104, - "grad_norm": 4.258671283721924, - "learning_rate": 4.475768666348415e-05, - "loss": 0.7131, - "step": 35580 - }, - { - "epoch": 0.31462720345126327, - "grad_norm": 10.900259971618652, - "learning_rate": 4.4756213275812284e-05, - "loss": 0.6627, - "step": 35590 - }, - { - "epoch": 0.3147156067115755, - "grad_norm": 7.8930983543396, - "learning_rate": 4.475473988814041e-05, - "loss": 0.7873, - "step": 35600 - }, - { - "epoch": 0.3148040099718878, - "grad_norm": 1.966615915298462, - "learning_rate": 4.475326650046854e-05, - "loss": 0.7197, - "step": 35610 - }, - { - "epoch": 0.3148924132322, - "grad_norm": 3.239475965499878, - "learning_rate": 4.475179311279667e-05, - "loss": 0.826, - "step": 35620 - }, - { - "epoch": 0.31498081649251225, - "grad_norm": 8.250494956970215, - "learning_rate": 4.4750319725124804e-05, - "loss": 0.7778, - "step": 35630 - }, - { - "epoch": 0.3150692197528245, - "grad_norm": 2.4526751041412354, - "learning_rate": 4.4748846337452925e-05, - "loss": 0.6566, - "step": 35640 - }, - { - "epoch": 0.3151576230131367, - "grad_norm": 7.8469109535217285, - "learning_rate": 4.474737294978106e-05, - "loss": 0.6996, - "step": 35650 - }, - { - "epoch": 0.31524602627344894, - "grad_norm": 2.9463260173797607, - "learning_rate": 4.474589956210918e-05, - "loss": 0.6945, - "step": 35660 - }, - { - "epoch": 0.31533442953376123, - "grad_norm": 4.764516353607178, - "learning_rate": 4.474442617443732e-05, - "loss": 0.7386, - "step": 35670 - }, - { - "epoch": 0.31542283279407346, - "grad_norm": 8.156478881835938, - "learning_rate": 4.4742952786765446e-05, - "loss": 0.6639, - "step": 35680 - }, - { - "epoch": 0.3155112360543857, - "grad_norm": 6.431391716003418, - "learning_rate": 4.4741479399093574e-05, - "loss": 0.8414, - "step": 35690 - }, - { - "epoch": 0.3155996393146979, - "grad_norm": 2.5973117351531982, - "learning_rate": 4.47400060114217e-05, - "loss": 0.7268, - "step": 35700 - }, - { - "epoch": 0.31568804257501015, - "grad_norm": 2.5390853881835938, - "learning_rate": 4.473853262374984e-05, - "loss": 0.9046, - "step": 35710 - }, - { - "epoch": 0.3157764458353224, - "grad_norm": 2.5553884506225586, - "learning_rate": 4.473705923607796e-05, - "loss": 0.7369, - "step": 35720 - }, - { - "epoch": 0.31586484909563467, - "grad_norm": 5.37067985534668, - "learning_rate": 4.4735585848406094e-05, - "loss": 0.7933, - "step": 35730 - }, - { - "epoch": 0.3159532523559469, - "grad_norm": 2.2314293384552, - "learning_rate": 4.473411246073422e-05, - "loss": 0.7073, - "step": 35740 - }, - { - "epoch": 0.31604165561625913, - "grad_norm": 4.727914810180664, - "learning_rate": 4.473263907306235e-05, - "loss": 0.6766, - "step": 35750 - }, - { - "epoch": 0.31613005887657136, - "grad_norm": 3.5170843601226807, - "learning_rate": 4.473116568539048e-05, - "loss": 0.9076, - "step": 35760 - }, - { - "epoch": 0.3162184621368836, - "grad_norm": 3.3607208728790283, - "learning_rate": 4.472969229771861e-05, - "loss": 0.6969, - "step": 35770 - }, - { - "epoch": 0.3163068653971958, - "grad_norm": 3.736297607421875, - "learning_rate": 4.4728218910046736e-05, - "loss": 0.6165, - "step": 35780 - }, - { - "epoch": 0.3163952686575081, - "grad_norm": 2.551983594894409, - "learning_rate": 4.472674552237487e-05, - "loss": 0.6292, - "step": 35790 - }, - { - "epoch": 0.31648367191782034, - "grad_norm": 7.082693099975586, - "learning_rate": 4.472527213470299e-05, - "loss": 0.8233, - "step": 35800 - }, - { - "epoch": 0.3165720751781326, - "grad_norm": 1.9834364652633667, - "learning_rate": 4.472379874703113e-05, - "loss": 0.6516, - "step": 35810 - }, - { - "epoch": 0.3166604784384448, - "grad_norm": 3.8084793090820312, - "learning_rate": 4.4722325359359256e-05, - "loss": 0.7678, - "step": 35820 - }, - { - "epoch": 0.31674888169875703, - "grad_norm": 5.054144859313965, - "learning_rate": 4.4720851971687384e-05, - "loss": 0.7355, - "step": 35830 - }, - { - "epoch": 0.31683728495906927, - "grad_norm": 3.4051175117492676, - "learning_rate": 4.471937858401551e-05, - "loss": 0.6345, - "step": 35840 - }, - { - "epoch": 0.31692568821938155, - "grad_norm": 5.139364242553711, - "learning_rate": 4.471790519634365e-05, - "loss": 0.7938, - "step": 35850 - }, - { - "epoch": 0.3170140914796938, - "grad_norm": 4.080883502960205, - "learning_rate": 4.471643180867177e-05, - "loss": 0.7778, - "step": 35860 - }, - { - "epoch": 0.317102494740006, - "grad_norm": 3.567744016647339, - "learning_rate": 4.4714958420999905e-05, - "loss": 0.672, - "step": 35870 - }, - { - "epoch": 0.31719089800031824, - "grad_norm": 6.6573286056518555, - "learning_rate": 4.4713485033328026e-05, - "loss": 0.7922, - "step": 35880 - }, - { - "epoch": 0.3172793012606305, - "grad_norm": 3.859511375427246, - "learning_rate": 4.471201164565616e-05, - "loss": 0.8442, - "step": 35890 - }, - { - "epoch": 0.3173677045209427, - "grad_norm": 1.7332161664962769, - "learning_rate": 4.471053825798429e-05, - "loss": 0.6553, - "step": 35900 - }, - { - "epoch": 0.317456107781255, - "grad_norm": 1.9279416799545288, - "learning_rate": 4.470906487031242e-05, - "loss": 0.772, - "step": 35910 - }, - { - "epoch": 0.3175445110415672, - "grad_norm": 1.5226647853851318, - "learning_rate": 4.4707591482640546e-05, - "loss": 0.7767, - "step": 35920 - }, - { - "epoch": 0.31763291430187945, - "grad_norm": 3.45474910736084, - "learning_rate": 4.470611809496868e-05, - "loss": 0.728, - "step": 35930 - }, - { - "epoch": 0.3177213175621917, - "grad_norm": 5.9742913246154785, - "learning_rate": 4.47046447072968e-05, - "loss": 0.8092, - "step": 35940 - }, - { - "epoch": 0.3178097208225039, - "grad_norm": 1.7873629331588745, - "learning_rate": 4.470317131962494e-05, - "loss": 0.6293, - "step": 35950 - }, - { - "epoch": 0.31789812408281615, - "grad_norm": 2.174562454223633, - "learning_rate": 4.4701697931953067e-05, - "loss": 0.7105, - "step": 35960 - }, - { - "epoch": 0.31798652734312843, - "grad_norm": 4.899603843688965, - "learning_rate": 4.4700224544281195e-05, - "loss": 0.7141, - "step": 35970 - }, - { - "epoch": 0.31807493060344066, - "grad_norm": 10.002727508544922, - "learning_rate": 4.469875115660932e-05, - "loss": 0.8434, - "step": 35980 - }, - { - "epoch": 0.3181633338637529, - "grad_norm": 4.238052845001221, - "learning_rate": 4.469727776893745e-05, - "loss": 0.7131, - "step": 35990 - }, - { - "epoch": 0.3182517371240651, - "grad_norm": 3.539936065673828, - "learning_rate": 4.469580438126558e-05, - "loss": 0.8386, - "step": 36000 - }, - { - "epoch": 0.31834014038437736, - "grad_norm": 12.063549041748047, - "learning_rate": 4.4694330993593715e-05, - "loss": 0.6592, - "step": 36010 - }, - { - "epoch": 0.31842854364468964, - "grad_norm": 2.9906394481658936, - "learning_rate": 4.469285760592184e-05, - "loss": 0.7182, - "step": 36020 - }, - { - "epoch": 0.3185169469050019, - "grad_norm": 1.3524519205093384, - "learning_rate": 4.469138421824997e-05, - "loss": 0.7373, - "step": 36030 - }, - { - "epoch": 0.3186053501653141, - "grad_norm": 1.7572444677352905, - "learning_rate": 4.46899108305781e-05, - "loss": 0.7276, - "step": 36040 - }, - { - "epoch": 0.31869375342562634, - "grad_norm": 5.897258281707764, - "learning_rate": 4.468843744290623e-05, - "loss": 0.7438, - "step": 36050 - }, - { - "epoch": 0.31878215668593857, - "grad_norm": 4.364428520202637, - "learning_rate": 4.468696405523436e-05, - "loss": 0.686, - "step": 36060 - }, - { - "epoch": 0.3188705599462508, - "grad_norm": 4.301984786987305, - "learning_rate": 4.468549066756249e-05, - "loss": 0.6266, - "step": 36070 - }, - { - "epoch": 0.3189589632065631, - "grad_norm": 2.856872081756592, - "learning_rate": 4.4684017279890614e-05, - "loss": 0.8214, - "step": 36080 - }, - { - "epoch": 0.3190473664668753, - "grad_norm": 2.6192708015441895, - "learning_rate": 4.468254389221875e-05, - "loss": 0.6855, - "step": 36090 - }, - { - "epoch": 0.31913576972718755, - "grad_norm": 2.8822391033172607, - "learning_rate": 4.468107050454688e-05, - "loss": 0.7082, - "step": 36100 - }, - { - "epoch": 0.3192241729874998, - "grad_norm": 19.16712760925293, - "learning_rate": 4.4679597116875005e-05, - "loss": 0.7894, - "step": 36110 - }, - { - "epoch": 0.319312576247812, - "grad_norm": 7.504942893981934, - "learning_rate": 4.4678123729203134e-05, - "loss": 0.7222, - "step": 36120 - }, - { - "epoch": 0.31940097950812424, - "grad_norm": 4.075170993804932, - "learning_rate": 4.467665034153126e-05, - "loss": 0.8002, - "step": 36130 - }, - { - "epoch": 0.3194893827684365, - "grad_norm": 2.7567331790924072, - "learning_rate": 4.467517695385939e-05, - "loss": 0.7948, - "step": 36140 - }, - { - "epoch": 0.31957778602874876, - "grad_norm": 1.9092066287994385, - "learning_rate": 4.4673703566187526e-05, - "loss": 0.6888, - "step": 36150 - }, - { - "epoch": 0.319666189289061, - "grad_norm": 7.898533821105957, - "learning_rate": 4.467223017851565e-05, - "loss": 0.7943, - "step": 36160 - }, - { - "epoch": 0.3197545925493732, - "grad_norm": 6.137923717498779, - "learning_rate": 4.467075679084378e-05, - "loss": 0.8553, - "step": 36170 - }, - { - "epoch": 0.31984299580968545, - "grad_norm": 2.9837148189544678, - "learning_rate": 4.466928340317191e-05, - "loss": 0.8759, - "step": 36180 - }, - { - "epoch": 0.3199313990699977, - "grad_norm": 3.2219789028167725, - "learning_rate": 4.466781001550004e-05, - "loss": 0.7754, - "step": 36190 - }, - { - "epoch": 0.32001980233030997, - "grad_norm": 6.509387493133545, - "learning_rate": 4.466633662782817e-05, - "loss": 0.6833, - "step": 36200 - }, - { - "epoch": 0.3201082055906222, - "grad_norm": 9.746037483215332, - "learning_rate": 4.46648632401563e-05, - "loss": 0.8367, - "step": 36210 - }, - { - "epoch": 0.3201966088509344, - "grad_norm": 4.755408763885498, - "learning_rate": 4.4663389852484424e-05, - "loss": 0.6208, - "step": 36220 - }, - { - "epoch": 0.32028501211124666, - "grad_norm": 3.9872827529907227, - "learning_rate": 4.466191646481256e-05, - "loss": 0.7711, - "step": 36230 - }, - { - "epoch": 0.3203734153715589, - "grad_norm": 3.089550256729126, - "learning_rate": 4.466044307714069e-05, - "loss": 0.7777, - "step": 36240 - }, - { - "epoch": 0.3204618186318711, - "grad_norm": 5.104980945587158, - "learning_rate": 4.4658969689468816e-05, - "loss": 0.6708, - "step": 36250 - }, - { - "epoch": 0.3205502218921834, - "grad_norm": 5.793055534362793, - "learning_rate": 4.4657496301796944e-05, - "loss": 0.778, - "step": 36260 - }, - { - "epoch": 0.32063862515249564, - "grad_norm": 7.498042583465576, - "learning_rate": 4.465602291412507e-05, - "loss": 0.64, - "step": 36270 - }, - { - "epoch": 0.32072702841280787, - "grad_norm": 1.810036540031433, - "learning_rate": 4.46545495264532e-05, - "loss": 0.6931, - "step": 36280 - }, - { - "epoch": 0.3208154316731201, - "grad_norm": 4.0473222732543945, - "learning_rate": 4.4653076138781336e-05, - "loss": 0.6462, - "step": 36290 - }, - { - "epoch": 0.32090383493343233, - "grad_norm": 3.327176570892334, - "learning_rate": 4.4651602751109464e-05, - "loss": 0.669, - "step": 36300 - }, - { - "epoch": 0.32099223819374456, - "grad_norm": 12.564557075500488, - "learning_rate": 4.465012936343759e-05, - "loss": 0.6947, - "step": 36310 - }, - { - "epoch": 0.32108064145405685, - "grad_norm": 3.4070346355438232, - "learning_rate": 4.464865597576572e-05, - "loss": 0.834, - "step": 36320 - }, - { - "epoch": 0.3211690447143691, - "grad_norm": 8.077347755432129, - "learning_rate": 4.464718258809385e-05, - "loss": 0.65, - "step": 36330 - }, - { - "epoch": 0.3212574479746813, - "grad_norm": 6.8531036376953125, - "learning_rate": 4.464570920042198e-05, - "loss": 0.7236, - "step": 36340 - }, - { - "epoch": 0.32134585123499354, - "grad_norm": 4.59869909286499, - "learning_rate": 4.4644235812750106e-05, - "loss": 0.6737, - "step": 36350 - }, - { - "epoch": 0.32143425449530577, - "grad_norm": 2.6326897144317627, - "learning_rate": 4.464276242507824e-05, - "loss": 0.7285, - "step": 36360 - }, - { - "epoch": 0.321522657755618, - "grad_norm": 7.368484973907471, - "learning_rate": 4.464128903740637e-05, - "loss": 0.9055, - "step": 36370 - }, - { - "epoch": 0.3216110610159303, - "grad_norm": 4.5146484375, - "learning_rate": 4.46398156497345e-05, - "loss": 0.7162, - "step": 36380 - }, - { - "epoch": 0.3216994642762425, - "grad_norm": 5.891968250274658, - "learning_rate": 4.4638342262062626e-05, - "loss": 0.7745, - "step": 36390 - }, - { - "epoch": 0.32178786753655475, - "grad_norm": 7.0796990394592285, - "learning_rate": 4.4636868874390755e-05, - "loss": 0.7461, - "step": 36400 - }, - { - "epoch": 0.321876270796867, - "grad_norm": 2.960014581680298, - "learning_rate": 4.463539548671888e-05, - "loss": 0.6261, - "step": 36410 - }, - { - "epoch": 0.3219646740571792, - "grad_norm": 3.8946077823638916, - "learning_rate": 4.463392209904702e-05, - "loss": 0.7359, - "step": 36420 - }, - { - "epoch": 0.32205307731749144, - "grad_norm": 3.6543290615081787, - "learning_rate": 4.463244871137515e-05, - "loss": 0.695, - "step": 36430 - }, - { - "epoch": 0.32214148057780373, - "grad_norm": 4.2082438468933105, - "learning_rate": 4.4630975323703275e-05, - "loss": 0.6462, - "step": 36440 - }, - { - "epoch": 0.32222988383811596, - "grad_norm": 7.0076751708984375, - "learning_rate": 4.46295019360314e-05, - "loss": 0.7556, - "step": 36450 - }, - { - "epoch": 0.3223182870984282, - "grad_norm": 5.183751106262207, - "learning_rate": 4.462802854835953e-05, - "loss": 0.6722, - "step": 36460 - }, - { - "epoch": 0.3224066903587404, - "grad_norm": 10.116753578186035, - "learning_rate": 4.462655516068766e-05, - "loss": 0.8298, - "step": 36470 - }, - { - "epoch": 0.32249509361905265, - "grad_norm": 5.947454929351807, - "learning_rate": 4.4625081773015795e-05, - "loss": 0.7069, - "step": 36480 - }, - { - "epoch": 0.3225834968793649, - "grad_norm": 5.148024082183838, - "learning_rate": 4.462360838534392e-05, - "loss": 0.67, - "step": 36490 - }, - { - "epoch": 0.32267190013967717, - "grad_norm": 2.5367794036865234, - "learning_rate": 4.462213499767205e-05, - "loss": 0.7776, - "step": 36500 - }, - { - "epoch": 0.3227603033999894, - "grad_norm": 3.567840099334717, - "learning_rate": 4.462066161000018e-05, - "loss": 0.8079, - "step": 36510 - }, - { - "epoch": 0.32284870666030163, - "grad_norm": 3.3447000980377197, - "learning_rate": 4.461918822232831e-05, - "loss": 0.8521, - "step": 36520 - }, - { - "epoch": 0.32293710992061386, - "grad_norm": 1.703671932220459, - "learning_rate": 4.461771483465644e-05, - "loss": 0.7592, - "step": 36530 - }, - { - "epoch": 0.3230255131809261, - "grad_norm": 5.826778888702393, - "learning_rate": 4.461624144698457e-05, - "loss": 0.6254, - "step": 36540 - }, - { - "epoch": 0.3231139164412384, - "grad_norm": 5.9275288581848145, - "learning_rate": 4.4614768059312694e-05, - "loss": 0.7534, - "step": 36550 - }, - { - "epoch": 0.3232023197015506, - "grad_norm": 2.486701250076294, - "learning_rate": 4.461329467164083e-05, - "loss": 0.8027, - "step": 36560 - }, - { - "epoch": 0.32329072296186284, - "grad_norm": 8.054226875305176, - "learning_rate": 4.461182128396896e-05, - "loss": 0.8708, - "step": 36570 - }, - { - "epoch": 0.3233791262221751, - "grad_norm": 2.529788017272949, - "learning_rate": 4.4610347896297085e-05, - "loss": 0.7099, - "step": 36580 - }, - { - "epoch": 0.3234675294824873, - "grad_norm": 2.2886850833892822, - "learning_rate": 4.4608874508625214e-05, - "loss": 0.7396, - "step": 36590 - }, - { - "epoch": 0.32355593274279953, - "grad_norm": 6.225261211395264, - "learning_rate": 4.460740112095334e-05, - "loss": 0.7366, - "step": 36600 - }, - { - "epoch": 0.3236443360031118, - "grad_norm": 7.799415111541748, - "learning_rate": 4.460592773328147e-05, - "loss": 0.6858, - "step": 36610 - }, - { - "epoch": 0.32373273926342405, - "grad_norm": 2.9057488441467285, - "learning_rate": 4.4604454345609606e-05, - "loss": 0.6897, - "step": 36620 - }, - { - "epoch": 0.3238211425237363, - "grad_norm": 3.1471447944641113, - "learning_rate": 4.460298095793773e-05, - "loss": 0.757, - "step": 36630 - }, - { - "epoch": 0.3239095457840485, - "grad_norm": 8.476110458374023, - "learning_rate": 4.460150757026586e-05, - "loss": 0.7871, - "step": 36640 - }, - { - "epoch": 0.32399794904436074, - "grad_norm": 3.5183098316192627, - "learning_rate": 4.460003418259399e-05, - "loss": 0.5878, - "step": 36650 - }, - { - "epoch": 0.324086352304673, - "grad_norm": 5.67738676071167, - "learning_rate": 4.459856079492212e-05, - "loss": 0.5429, - "step": 36660 - }, - { - "epoch": 0.32417475556498526, - "grad_norm": 5.265235424041748, - "learning_rate": 4.459708740725025e-05, - "loss": 0.7492, - "step": 36670 - }, - { - "epoch": 0.3242631588252975, - "grad_norm": 14.093735694885254, - "learning_rate": 4.459561401957838e-05, - "loss": 0.5584, - "step": 36680 - }, - { - "epoch": 0.3243515620856097, - "grad_norm": 5.544258117675781, - "learning_rate": 4.4594140631906504e-05, - "loss": 0.7839, - "step": 36690 - }, - { - "epoch": 0.32443996534592195, - "grad_norm": 4.524329662322998, - "learning_rate": 4.459266724423464e-05, - "loss": 0.6969, - "step": 36700 - }, - { - "epoch": 0.3245283686062342, - "grad_norm": 6.473782539367676, - "learning_rate": 4.459119385656276e-05, - "loss": 0.7758, - "step": 36710 - }, - { - "epoch": 0.3246167718665464, - "grad_norm": 3.0859789848327637, - "learning_rate": 4.4589720468890896e-05, - "loss": 0.6721, - "step": 36720 - }, - { - "epoch": 0.3247051751268587, - "grad_norm": 5.294314861297607, - "learning_rate": 4.4588247081219024e-05, - "loss": 0.8849, - "step": 36730 - }, - { - "epoch": 0.32479357838717093, - "grad_norm": 10.948575973510742, - "learning_rate": 4.458677369354715e-05, - "loss": 0.7714, - "step": 36740 - }, - { - "epoch": 0.32488198164748316, - "grad_norm": 4.2016754150390625, - "learning_rate": 4.458530030587528e-05, - "loss": 0.8192, - "step": 36750 - }, - { - "epoch": 0.3249703849077954, - "grad_norm": 4.120140552520752, - "learning_rate": 4.4583826918203416e-05, - "loss": 0.8306, - "step": 36760 - }, - { - "epoch": 0.3250587881681076, - "grad_norm": 5.85779333114624, - "learning_rate": 4.458235353053154e-05, - "loss": 0.928, - "step": 36770 - }, - { - "epoch": 0.32514719142841986, - "grad_norm": 4.28076696395874, - "learning_rate": 4.458088014285967e-05, - "loss": 0.9034, - "step": 36780 - }, - { - "epoch": 0.32523559468873214, - "grad_norm": 3.8169968128204346, - "learning_rate": 4.45794067551878e-05, - "loss": 0.7378, - "step": 36790 - }, - { - "epoch": 0.3253239979490444, - "grad_norm": 3.805772304534912, - "learning_rate": 4.457793336751593e-05, - "loss": 0.6729, - "step": 36800 - }, - { - "epoch": 0.3254124012093566, - "grad_norm": 2.670017957687378, - "learning_rate": 4.457645997984406e-05, - "loss": 0.696, - "step": 36810 - }, - { - "epoch": 0.32550080446966884, - "grad_norm": 9.231040954589844, - "learning_rate": 4.4574986592172186e-05, - "loss": 0.6923, - "step": 36820 - }, - { - "epoch": 0.32558920772998107, - "grad_norm": 3.8730318546295166, - "learning_rate": 4.4573513204500315e-05, - "loss": 0.7452, - "step": 36830 - }, - { - "epoch": 0.3256776109902933, - "grad_norm": 1.7324206829071045, - "learning_rate": 4.457203981682845e-05, - "loss": 0.7284, - "step": 36840 - }, - { - "epoch": 0.3257660142506056, - "grad_norm": 7.449343681335449, - "learning_rate": 4.457056642915657e-05, - "loss": 0.7128, - "step": 36850 - }, - { - "epoch": 0.3258544175109178, - "grad_norm": 2.6910338401794434, - "learning_rate": 4.4569093041484707e-05, - "loss": 0.6969, - "step": 36860 - }, - { - "epoch": 0.32594282077123005, - "grad_norm": 7.11206579208374, - "learning_rate": 4.4567619653812835e-05, - "loss": 0.7615, - "step": 36870 - }, - { - "epoch": 0.3260312240315423, - "grad_norm": 4.754086017608643, - "learning_rate": 4.456614626614096e-05, - "loss": 0.7397, - "step": 36880 - }, - { - "epoch": 0.3261196272918545, - "grad_norm": 3.5037434101104736, - "learning_rate": 4.456467287846909e-05, - "loss": 0.649, - "step": 36890 - }, - { - "epoch": 0.32620803055216674, - "grad_norm": 3.6783218383789062, - "learning_rate": 4.456319949079723e-05, - "loss": 0.8453, - "step": 36900 - }, - { - "epoch": 0.326296433812479, - "grad_norm": 3.4167351722717285, - "learning_rate": 4.456172610312535e-05, - "loss": 0.7621, - "step": 36910 - }, - { - "epoch": 0.32638483707279126, - "grad_norm": 2.969130039215088, - "learning_rate": 4.4560252715453483e-05, - "loss": 0.675, - "step": 36920 - }, - { - "epoch": 0.3264732403331035, - "grad_norm": 8.845534324645996, - "learning_rate": 4.455877932778161e-05, - "loss": 0.8149, - "step": 36930 - }, - { - "epoch": 0.3265616435934157, - "grad_norm": 2.8086657524108887, - "learning_rate": 4.455730594010974e-05, - "loss": 0.7067, - "step": 36940 - }, - { - "epoch": 0.32665004685372795, - "grad_norm": 8.169519424438477, - "learning_rate": 4.455583255243787e-05, - "loss": 0.7916, - "step": 36950 - }, - { - "epoch": 0.3267384501140402, - "grad_norm": 3.0278542041778564, - "learning_rate": 4.4554359164766e-05, - "loss": 0.6999, - "step": 36960 - }, - { - "epoch": 0.32682685337435247, - "grad_norm": 1.4574494361877441, - "learning_rate": 4.4552885777094125e-05, - "loss": 0.7723, - "step": 36970 - }, - { - "epoch": 0.3269152566346647, - "grad_norm": 5.4884114265441895, - "learning_rate": 4.455141238942226e-05, - "loss": 0.6939, - "step": 36980 - }, - { - "epoch": 0.32700365989497693, - "grad_norm": 6.8113532066345215, - "learning_rate": 4.454993900175038e-05, - "loss": 0.7398, - "step": 36990 - }, - { - "epoch": 0.32709206315528916, - "grad_norm": 4.3907928466796875, - "learning_rate": 4.454846561407852e-05, - "loss": 0.677, - "step": 37000 - }, - { - "epoch": 0.3271804664156014, - "grad_norm": 4.854316711425781, - "learning_rate": 4.4546992226406645e-05, - "loss": 0.7735, - "step": 37010 - }, - { - "epoch": 0.3272688696759136, - "grad_norm": 3.0051686763763428, - "learning_rate": 4.4545518838734774e-05, - "loss": 0.7206, - "step": 37020 - }, - { - "epoch": 0.3273572729362259, - "grad_norm": 4.15510892868042, - "learning_rate": 4.45440454510629e-05, - "loss": 0.7273, - "step": 37030 - }, - { - "epoch": 0.32744567619653814, - "grad_norm": 7.260164260864258, - "learning_rate": 4.454257206339104e-05, - "loss": 0.8417, - "step": 37040 - }, - { - "epoch": 0.32753407945685037, - "grad_norm": 4.777120590209961, - "learning_rate": 4.454109867571916e-05, - "loss": 0.6485, - "step": 37050 - }, - { - "epoch": 0.3276224827171626, - "grad_norm": 5.564696311950684, - "learning_rate": 4.4539625288047294e-05, - "loss": 0.8041, - "step": 37060 - }, - { - "epoch": 0.32771088597747483, - "grad_norm": 7.659509658813477, - "learning_rate": 4.4538151900375415e-05, - "loss": 0.8141, - "step": 37070 - }, - { - "epoch": 0.3277992892377871, - "grad_norm": 4.162317752838135, - "learning_rate": 4.453667851270355e-05, - "loss": 0.747, - "step": 37080 - }, - { - "epoch": 0.32788769249809935, - "grad_norm": 2.4380946159362793, - "learning_rate": 4.453520512503168e-05, - "loss": 0.7385, - "step": 37090 - }, - { - "epoch": 0.3279760957584116, - "grad_norm": 5.835511207580566, - "learning_rate": 4.453373173735981e-05, - "loss": 0.7104, - "step": 37100 - }, - { - "epoch": 0.3280644990187238, - "grad_norm": 3.2107927799224854, - "learning_rate": 4.4532258349687936e-05, - "loss": 0.8393, - "step": 37110 - }, - { - "epoch": 0.32815290227903604, - "grad_norm": 2.5637314319610596, - "learning_rate": 4.453078496201607e-05, - "loss": 0.7245, - "step": 37120 - }, - { - "epoch": 0.32824130553934827, - "grad_norm": 4.150752067565918, - "learning_rate": 4.452931157434419e-05, - "loss": 0.7199, - "step": 37130 - }, - { - "epoch": 0.32832970879966056, - "grad_norm": 7.983151912689209, - "learning_rate": 4.452783818667233e-05, - "loss": 0.6668, - "step": 37140 - }, - { - "epoch": 0.3284181120599728, - "grad_norm": 5.745832443237305, - "learning_rate": 4.4526364799000456e-05, - "loss": 0.71, - "step": 37150 - }, - { - "epoch": 0.328506515320285, - "grad_norm": 2.8052961826324463, - "learning_rate": 4.4524891411328584e-05, - "loss": 0.5905, - "step": 37160 - }, - { - "epoch": 0.32859491858059725, - "grad_norm": 7.838033676147461, - "learning_rate": 4.452341802365671e-05, - "loss": 0.6963, - "step": 37170 - }, - { - "epoch": 0.3286833218409095, - "grad_norm": 4.461402893066406, - "learning_rate": 4.452194463598484e-05, - "loss": 0.87, - "step": 37180 - }, - { - "epoch": 0.3287717251012217, - "grad_norm": 11.672638893127441, - "learning_rate": 4.452047124831297e-05, - "loss": 0.8347, - "step": 37190 - }, - { - "epoch": 0.328860128361534, - "grad_norm": 5.240647792816162, - "learning_rate": 4.4518997860641104e-05, - "loss": 0.8136, - "step": 37200 - }, - { - "epoch": 0.32894853162184623, - "grad_norm": 5.189323902130127, - "learning_rate": 4.451752447296923e-05, - "loss": 0.8399, - "step": 37210 - }, - { - "epoch": 0.32903693488215846, - "grad_norm": 6.071349620819092, - "learning_rate": 4.451605108529736e-05, - "loss": 0.7196, - "step": 37220 - }, - { - "epoch": 0.3291253381424707, - "grad_norm": 3.4593567848205566, - "learning_rate": 4.451457769762549e-05, - "loss": 0.7408, - "step": 37230 - }, - { - "epoch": 0.3292137414027829, - "grad_norm": 3.869748115539551, - "learning_rate": 4.451310430995362e-05, - "loss": 0.7996, - "step": 37240 - }, - { - "epoch": 0.32930214466309515, - "grad_norm": 2.687638998031616, - "learning_rate": 4.4511630922281746e-05, - "loss": 0.7685, - "step": 37250 - }, - { - "epoch": 0.32939054792340744, - "grad_norm": 3.2222094535827637, - "learning_rate": 4.451015753460988e-05, - "loss": 0.6959, - "step": 37260 - }, - { - "epoch": 0.32947895118371967, - "grad_norm": 5.09929895401001, - "learning_rate": 4.450868414693801e-05, - "loss": 0.6967, - "step": 37270 - }, - { - "epoch": 0.3295673544440319, - "grad_norm": 6.807804107666016, - "learning_rate": 4.450721075926614e-05, - "loss": 0.7079, - "step": 37280 - }, - { - "epoch": 0.32965575770434413, - "grad_norm": 6.794445037841797, - "learning_rate": 4.4505737371594266e-05, - "loss": 0.7696, - "step": 37290 - }, - { - "epoch": 0.32974416096465636, - "grad_norm": 2.0575544834136963, - "learning_rate": 4.4504263983922395e-05, - "loss": 0.7636, - "step": 37300 - }, - { - "epoch": 0.3298325642249686, - "grad_norm": 4.3312764167785645, - "learning_rate": 4.450279059625053e-05, - "loss": 0.8836, - "step": 37310 - }, - { - "epoch": 0.3299209674852809, - "grad_norm": 9.852190017700195, - "learning_rate": 4.450131720857865e-05, - "loss": 0.6994, - "step": 37320 - }, - { - "epoch": 0.3300093707455931, - "grad_norm": 3.0304930210113525, - "learning_rate": 4.4499843820906787e-05, - "loss": 0.7432, - "step": 37330 - }, - { - "epoch": 0.33009777400590534, - "grad_norm": 4.672139644622803, - "learning_rate": 4.4498370433234915e-05, - "loss": 0.6682, - "step": 37340 - }, - { - "epoch": 0.3301861772662176, - "grad_norm": 4.245362758636475, - "learning_rate": 4.449689704556304e-05, - "loss": 0.7603, - "step": 37350 - }, - { - "epoch": 0.3302745805265298, - "grad_norm": 2.399181365966797, - "learning_rate": 4.449542365789117e-05, - "loss": 0.7421, - "step": 37360 - }, - { - "epoch": 0.33036298378684203, - "grad_norm": 6.010427474975586, - "learning_rate": 4.449395027021931e-05, - "loss": 0.7896, - "step": 37370 - }, - { - "epoch": 0.3304513870471543, - "grad_norm": 4.387106895446777, - "learning_rate": 4.449247688254743e-05, - "loss": 0.7033, - "step": 37380 - }, - { - "epoch": 0.33053979030746655, - "grad_norm": 2.2987771034240723, - "learning_rate": 4.4491003494875563e-05, - "loss": 0.8088, - "step": 37390 - }, - { - "epoch": 0.3306281935677788, - "grad_norm": 5.155879497528076, - "learning_rate": 4.448953010720369e-05, - "loss": 0.7601, - "step": 37400 - }, - { - "epoch": 0.330716596828091, - "grad_norm": 3.248589515686035, - "learning_rate": 4.448805671953182e-05, - "loss": 0.7168, - "step": 37410 - }, - { - "epoch": 0.33080500008840324, - "grad_norm": 3.9455742835998535, - "learning_rate": 4.448658333185995e-05, - "loss": 0.7608, - "step": 37420 - }, - { - "epoch": 0.3308934033487155, - "grad_norm": 4.91671085357666, - "learning_rate": 4.448510994418808e-05, - "loss": 0.6567, - "step": 37430 - }, - { - "epoch": 0.33098180660902776, - "grad_norm": 2.7089884281158447, - "learning_rate": 4.4483636556516205e-05, - "loss": 0.6045, - "step": 37440 - }, - { - "epoch": 0.33107020986934, - "grad_norm": 4.3610687255859375, - "learning_rate": 4.448216316884434e-05, - "loss": 0.6921, - "step": 37450 - }, - { - "epoch": 0.3311586131296522, - "grad_norm": 5.27649450302124, - "learning_rate": 4.448068978117246e-05, - "loss": 0.6848, - "step": 37460 - }, - { - "epoch": 0.33124701638996445, - "grad_norm": 8.016512870788574, - "learning_rate": 4.44792163935006e-05, - "loss": 0.6426, - "step": 37470 - }, - { - "epoch": 0.3313354196502767, - "grad_norm": 2.9452693462371826, - "learning_rate": 4.4477743005828725e-05, - "loss": 0.7456, - "step": 37480 - }, - { - "epoch": 0.3314238229105889, - "grad_norm": 7.632255554199219, - "learning_rate": 4.4476269618156854e-05, - "loss": 0.6582, - "step": 37490 - }, - { - "epoch": 0.3315122261709012, - "grad_norm": 4.6061530113220215, - "learning_rate": 4.447479623048498e-05, - "loss": 0.7213, - "step": 37500 - }, - { - "epoch": 0.33160062943121343, - "grad_norm": 8.105106353759766, - "learning_rate": 4.447332284281312e-05, - "loss": 0.7712, - "step": 37510 - }, - { - "epoch": 0.33168903269152566, - "grad_norm": 13.633162498474121, - "learning_rate": 4.447184945514124e-05, - "loss": 0.7063, - "step": 37520 - }, - { - "epoch": 0.3317774359518379, - "grad_norm": 1.565040111541748, - "learning_rate": 4.4470376067469374e-05, - "loss": 0.6888, - "step": 37530 - }, - { - "epoch": 0.3318658392121501, - "grad_norm": 1.4071500301361084, - "learning_rate": 4.4468902679797496e-05, - "loss": 0.6259, - "step": 37540 - }, - { - "epoch": 0.33195424247246236, - "grad_norm": 3.1355643272399902, - "learning_rate": 4.446742929212563e-05, - "loss": 0.8225, - "step": 37550 - }, - { - "epoch": 0.33204264573277464, - "grad_norm": 5.063669681549072, - "learning_rate": 4.446595590445376e-05, - "loss": 0.6292, - "step": 37560 - }, - { - "epoch": 0.3321310489930869, - "grad_norm": 3.942572593688965, - "learning_rate": 4.446448251678189e-05, - "loss": 0.7594, - "step": 37570 - }, - { - "epoch": 0.3322194522533991, - "grad_norm": 2.7459118366241455, - "learning_rate": 4.4463009129110016e-05, - "loss": 0.7612, - "step": 37580 - }, - { - "epoch": 0.33230785551371134, - "grad_norm": 3.110966682434082, - "learning_rate": 4.446153574143815e-05, - "loss": 0.7944, - "step": 37590 - }, - { - "epoch": 0.33239625877402357, - "grad_norm": 2.620520830154419, - "learning_rate": 4.446006235376627e-05, - "loss": 0.7875, - "step": 37600 - }, - { - "epoch": 0.33248466203433585, - "grad_norm": 3.686331033706665, - "learning_rate": 4.445858896609441e-05, - "loss": 0.7606, - "step": 37610 - }, - { - "epoch": 0.3325730652946481, - "grad_norm": 9.094108581542969, - "learning_rate": 4.4457115578422536e-05, - "loss": 0.6452, - "step": 37620 - }, - { - "epoch": 0.3326614685549603, - "grad_norm": 3.4581174850463867, - "learning_rate": 4.4455642190750664e-05, - "loss": 0.7565, - "step": 37630 - }, - { - "epoch": 0.33274987181527255, - "grad_norm": 7.295807838439941, - "learning_rate": 4.445416880307879e-05, - "loss": 0.6143, - "step": 37640 - }, - { - "epoch": 0.3328382750755848, - "grad_norm": 7.385070323944092, - "learning_rate": 4.445269541540692e-05, - "loss": 0.8879, - "step": 37650 - }, - { - "epoch": 0.332926678335897, - "grad_norm": 4.298337459564209, - "learning_rate": 4.445122202773505e-05, - "loss": 0.6443, - "step": 37660 - }, - { - "epoch": 0.3330150815962093, - "grad_norm": 4.23216438293457, - "learning_rate": 4.4449748640063185e-05, - "loss": 0.6731, - "step": 37670 - }, - { - "epoch": 0.3331034848565215, - "grad_norm": 3.5965120792388916, - "learning_rate": 4.4448275252391306e-05, - "loss": 0.7848, - "step": 37680 - }, - { - "epoch": 0.33319188811683376, - "grad_norm": 6.041168212890625, - "learning_rate": 4.444680186471944e-05, - "loss": 0.8365, - "step": 37690 - }, - { - "epoch": 0.333280291377146, - "grad_norm": 2.818695306777954, - "learning_rate": 4.444532847704757e-05, - "loss": 0.7816, - "step": 37700 - }, - { - "epoch": 0.3333686946374582, - "grad_norm": 1.4142529964447021, - "learning_rate": 4.44438550893757e-05, - "loss": 0.7574, - "step": 37710 - }, - { - "epoch": 0.33345709789777045, - "grad_norm": 5.777283668518066, - "learning_rate": 4.4442381701703826e-05, - "loss": 0.7448, - "step": 37720 - }, - { - "epoch": 0.33354550115808274, - "grad_norm": 6.268942832946777, - "learning_rate": 4.444090831403196e-05, - "loss": 0.7011, - "step": 37730 - }, - { - "epoch": 0.33363390441839497, - "grad_norm": 3.510544776916504, - "learning_rate": 4.443943492636008e-05, - "loss": 0.7341, - "step": 37740 - }, - { - "epoch": 0.3337223076787072, - "grad_norm": 7.378816604614258, - "learning_rate": 4.443796153868822e-05, - "loss": 0.8453, - "step": 37750 - }, - { - "epoch": 0.33381071093901943, - "grad_norm": 3.546304941177368, - "learning_rate": 4.443648815101634e-05, - "loss": 0.8961, - "step": 37760 - }, - { - "epoch": 0.33389911419933166, - "grad_norm": 5.9796905517578125, - "learning_rate": 4.4435014763344475e-05, - "loss": 0.8286, - "step": 37770 - }, - { - "epoch": 0.3339875174596439, - "grad_norm": 3.9999821186065674, - "learning_rate": 4.44335413756726e-05, - "loss": 0.7225, - "step": 37780 - }, - { - "epoch": 0.3340759207199562, - "grad_norm": 5.240865230560303, - "learning_rate": 4.443206798800073e-05, - "loss": 0.7175, - "step": 37790 - }, - { - "epoch": 0.3341643239802684, - "grad_norm": 6.773401737213135, - "learning_rate": 4.443059460032886e-05, - "loss": 0.6985, - "step": 37800 - }, - { - "epoch": 0.33425272724058064, - "grad_norm": 1.9193450212478638, - "learning_rate": 4.4429121212656995e-05, - "loss": 0.7263, - "step": 37810 - }, - { - "epoch": 0.33434113050089287, - "grad_norm": 5.275112628936768, - "learning_rate": 4.4427647824985117e-05, - "loss": 0.7313, - "step": 37820 - }, - { - "epoch": 0.3344295337612051, - "grad_norm": 2.2873361110687256, - "learning_rate": 4.442617443731325e-05, - "loss": 0.7307, - "step": 37830 - }, - { - "epoch": 0.33451793702151733, - "grad_norm": 3.0789976119995117, - "learning_rate": 4.442470104964138e-05, - "loss": 0.7869, - "step": 37840 - }, - { - "epoch": 0.3346063402818296, - "grad_norm": 5.9778361320495605, - "learning_rate": 4.442322766196951e-05, - "loss": 0.7762, - "step": 37850 - }, - { - "epoch": 0.33469474354214185, - "grad_norm": 8.424156188964844, - "learning_rate": 4.442175427429764e-05, - "loss": 0.7244, - "step": 37860 - }, - { - "epoch": 0.3347831468024541, - "grad_norm": 3.348323106765747, - "learning_rate": 4.442028088662577e-05, - "loss": 0.7951, - "step": 37870 - }, - { - "epoch": 0.3348715500627663, - "grad_norm": 6.860462188720703, - "learning_rate": 4.4418807498953893e-05, - "loss": 0.6985, - "step": 37880 - }, - { - "epoch": 0.33495995332307854, - "grad_norm": 20.336254119873047, - "learning_rate": 4.441733411128203e-05, - "loss": 0.8301, - "step": 37890 - }, - { - "epoch": 0.33504835658339077, - "grad_norm": 2.144000768661499, - "learning_rate": 4.441586072361015e-05, - "loss": 0.6725, - "step": 37900 - }, - { - "epoch": 0.33513675984370306, - "grad_norm": 1.9997081756591797, - "learning_rate": 4.4414387335938285e-05, - "loss": 0.7408, - "step": 37910 - }, - { - "epoch": 0.3352251631040153, - "grad_norm": 7.838317394256592, - "learning_rate": 4.4412913948266414e-05, - "loss": 0.8178, - "step": 37920 - }, - { - "epoch": 0.3353135663643275, - "grad_norm": 2.0753121376037598, - "learning_rate": 4.441144056059454e-05, - "loss": 0.7305, - "step": 37930 - }, - { - "epoch": 0.33540196962463975, - "grad_norm": 2.404961585998535, - "learning_rate": 4.440996717292267e-05, - "loss": 0.7607, - "step": 37940 - }, - { - "epoch": 0.335490372884952, - "grad_norm": 2.8562088012695312, - "learning_rate": 4.4408493785250806e-05, - "loss": 0.6851, - "step": 37950 - }, - { - "epoch": 0.3355787761452642, - "grad_norm": 2.9436092376708984, - "learning_rate": 4.440702039757893e-05, - "loss": 0.7258, - "step": 37960 - }, - { - "epoch": 0.3356671794055765, - "grad_norm": 2.2134668827056885, - "learning_rate": 4.440554700990706e-05, - "loss": 0.7526, - "step": 37970 - }, - { - "epoch": 0.33575558266588873, - "grad_norm": 37.28148651123047, - "learning_rate": 4.440407362223519e-05, - "loss": 0.7394, - "step": 37980 - }, - { - "epoch": 0.33584398592620096, - "grad_norm": 3.944171190261841, - "learning_rate": 4.440260023456332e-05, - "loss": 0.7511, - "step": 37990 - }, - { - "epoch": 0.3359323891865132, - "grad_norm": 1.5751720666885376, - "learning_rate": 4.440112684689145e-05, - "loss": 0.83, - "step": 38000 - }, - { - "epoch": 0.3360207924468254, - "grad_norm": 3.7713801860809326, - "learning_rate": 4.4399653459219576e-05, - "loss": 0.8186, - "step": 38010 - }, - { - "epoch": 0.33610919570713765, - "grad_norm": 3.6327595710754395, - "learning_rate": 4.4398180071547704e-05, - "loss": 0.6205, - "step": 38020 - }, - { - "epoch": 0.33619759896744994, - "grad_norm": 8.001874923706055, - "learning_rate": 4.439670668387584e-05, - "loss": 0.695, - "step": 38030 - }, - { - "epoch": 0.33628600222776217, - "grad_norm": 2.667207956314087, - "learning_rate": 4.439523329620397e-05, - "loss": 0.8388, - "step": 38040 - }, - { - "epoch": 0.3363744054880744, - "grad_norm": 2.9962949752807617, - "learning_rate": 4.4393759908532096e-05, - "loss": 0.6691, - "step": 38050 - }, - { - "epoch": 0.33646280874838663, - "grad_norm": 4.5162529945373535, - "learning_rate": 4.4392286520860224e-05, - "loss": 0.7725, - "step": 38060 - }, - { - "epoch": 0.33655121200869886, - "grad_norm": 11.078516960144043, - "learning_rate": 4.439081313318835e-05, - "loss": 0.7412, - "step": 38070 - }, - { - "epoch": 0.3366396152690111, - "grad_norm": 11.169146537780762, - "learning_rate": 4.438933974551648e-05, - "loss": 0.7767, - "step": 38080 - }, - { - "epoch": 0.3367280185293234, - "grad_norm": 3.0054001808166504, - "learning_rate": 4.4387866357844616e-05, - "loss": 0.9013, - "step": 38090 - }, - { - "epoch": 0.3368164217896356, - "grad_norm": 2.3745791912078857, - "learning_rate": 4.4386392970172744e-05, - "loss": 0.8172, - "step": 38100 - }, - { - "epoch": 0.33690482504994784, - "grad_norm": 4.295470714569092, - "learning_rate": 4.438491958250087e-05, - "loss": 0.7548, - "step": 38110 - }, - { - "epoch": 0.3369932283102601, - "grad_norm": 5.31907320022583, - "learning_rate": 4.4383446194829e-05, - "loss": 0.7774, - "step": 38120 - }, - { - "epoch": 0.3370816315705723, - "grad_norm": 2.9692611694335938, - "learning_rate": 4.438197280715713e-05, - "loss": 0.6463, - "step": 38130 - }, - { - "epoch": 0.3371700348308846, - "grad_norm": 2.7916946411132812, - "learning_rate": 4.438049941948526e-05, - "loss": 0.7102, - "step": 38140 - }, - { - "epoch": 0.3372584380911968, - "grad_norm": 2.026956081390381, - "learning_rate": 4.4379026031813386e-05, - "loss": 0.6695, - "step": 38150 - }, - { - "epoch": 0.33734684135150905, - "grad_norm": 4.379522323608398, - "learning_rate": 4.437755264414152e-05, - "loss": 0.8069, - "step": 38160 - }, - { - "epoch": 0.3374352446118213, - "grad_norm": 2.1830618381500244, - "learning_rate": 4.437607925646965e-05, - "loss": 0.6677, - "step": 38170 - }, - { - "epoch": 0.3375236478721335, - "grad_norm": 3.632488965988159, - "learning_rate": 4.437460586879778e-05, - "loss": 0.8248, - "step": 38180 - }, - { - "epoch": 0.33761205113244575, - "grad_norm": 2.902557849884033, - "learning_rate": 4.4373132481125906e-05, - "loss": 0.7919, - "step": 38190 - }, - { - "epoch": 0.33770045439275803, - "grad_norm": 6.855132102966309, - "learning_rate": 4.4371659093454035e-05, - "loss": 0.608, - "step": 38200 - }, - { - "epoch": 0.33778885765307026, - "grad_norm": 20.101909637451172, - "learning_rate": 4.437018570578216e-05, - "loss": 0.806, - "step": 38210 - }, - { - "epoch": 0.3378772609133825, - "grad_norm": 4.567832946777344, - "learning_rate": 4.43687123181103e-05, - "loss": 0.7061, - "step": 38220 - }, - { - "epoch": 0.3379656641736947, - "grad_norm": 2.52120304107666, - "learning_rate": 4.436723893043842e-05, - "loss": 0.6626, - "step": 38230 - }, - { - "epoch": 0.33805406743400696, - "grad_norm": 7.894671440124512, - "learning_rate": 4.4365765542766555e-05, - "loss": 0.7207, - "step": 38240 - }, - { - "epoch": 0.3381424706943192, - "grad_norm": 2.661726951599121, - "learning_rate": 4.436429215509468e-05, - "loss": 0.6896, - "step": 38250 - }, - { - "epoch": 0.3382308739546315, - "grad_norm": 8.05888557434082, - "learning_rate": 4.436281876742281e-05, - "loss": 0.8958, - "step": 38260 - }, - { - "epoch": 0.3383192772149437, - "grad_norm": 5.401699542999268, - "learning_rate": 4.436134537975094e-05, - "loss": 0.8154, - "step": 38270 - }, - { - "epoch": 0.33840768047525593, - "grad_norm": 4.924627780914307, - "learning_rate": 4.4359871992079075e-05, - "loss": 0.8478, - "step": 38280 - }, - { - "epoch": 0.33849608373556817, - "grad_norm": 2.4446215629577637, - "learning_rate": 4.43583986044072e-05, - "loss": 0.7358, - "step": 38290 - }, - { - "epoch": 0.3385844869958804, - "grad_norm": 4.387701988220215, - "learning_rate": 4.435692521673533e-05, - "loss": 0.7378, - "step": 38300 - }, - { - "epoch": 0.3386728902561926, - "grad_norm": 4.186991214752197, - "learning_rate": 4.435545182906346e-05, - "loss": 0.7074, - "step": 38310 - }, - { - "epoch": 0.3387612935165049, - "grad_norm": 2.201347589492798, - "learning_rate": 4.435397844139159e-05, - "loss": 0.8112, - "step": 38320 - }, - { - "epoch": 0.33884969677681714, - "grad_norm": 8.414388656616211, - "learning_rate": 4.435250505371972e-05, - "loss": 0.6043, - "step": 38330 - }, - { - "epoch": 0.3389381000371294, - "grad_norm": 2.9845588207244873, - "learning_rate": 4.435103166604785e-05, - "loss": 0.5832, - "step": 38340 - }, - { - "epoch": 0.3390265032974416, - "grad_norm": 3.291576623916626, - "learning_rate": 4.4349558278375974e-05, - "loss": 0.7391, - "step": 38350 - }, - { - "epoch": 0.33911490655775384, - "grad_norm": 7.75879430770874, - "learning_rate": 4.434808489070411e-05, - "loss": 0.6709, - "step": 38360 - }, - { - "epoch": 0.33920330981806607, - "grad_norm": 3.6497490406036377, - "learning_rate": 4.434661150303223e-05, - "loss": 0.8984, - "step": 38370 - }, - { - "epoch": 0.33929171307837835, - "grad_norm": 3.4737775325775146, - "learning_rate": 4.4345138115360365e-05, - "loss": 0.6528, - "step": 38380 - }, - { - "epoch": 0.3393801163386906, - "grad_norm": 8.572341918945312, - "learning_rate": 4.4343664727688494e-05, - "loss": 0.6855, - "step": 38390 - }, - { - "epoch": 0.3394685195990028, - "grad_norm": 7.881353855133057, - "learning_rate": 4.434219134001662e-05, - "loss": 0.7787, - "step": 38400 - }, - { - "epoch": 0.33955692285931505, - "grad_norm": 1.3303899765014648, - "learning_rate": 4.434071795234475e-05, - "loss": 0.5963, - "step": 38410 - }, - { - "epoch": 0.3396453261196273, - "grad_norm": 4.56694221496582, - "learning_rate": 4.4339244564672886e-05, - "loss": 0.9442, - "step": 38420 - }, - { - "epoch": 0.3397337293799395, - "grad_norm": 3.620880365371704, - "learning_rate": 4.433777117700101e-05, - "loss": 0.7801, - "step": 38430 - }, - { - "epoch": 0.3398221326402518, - "grad_norm": 2.286504030227661, - "learning_rate": 4.433629778932914e-05, - "loss": 0.909, - "step": 38440 - }, - { - "epoch": 0.339910535900564, - "grad_norm": 3.468268871307373, - "learning_rate": 4.433482440165727e-05, - "loss": 0.6692, - "step": 38450 - }, - { - "epoch": 0.33999893916087626, - "grad_norm": 7.405500888824463, - "learning_rate": 4.43333510139854e-05, - "loss": 0.6987, - "step": 38460 - }, - { - "epoch": 0.3400873424211885, - "grad_norm": 5.755014419555664, - "learning_rate": 4.433187762631353e-05, - "loss": 0.6701, - "step": 38470 - }, - { - "epoch": 0.3401757456815007, - "grad_norm": 3.834282875061035, - "learning_rate": 4.4330404238641656e-05, - "loss": 0.8425, - "step": 38480 - }, - { - "epoch": 0.34026414894181295, - "grad_norm": 3.051474094390869, - "learning_rate": 4.4328930850969784e-05, - "loss": 0.6757, - "step": 38490 - }, - { - "epoch": 0.34035255220212524, - "grad_norm": 2.1180267333984375, - "learning_rate": 4.432745746329792e-05, - "loss": 0.7447, - "step": 38500 - }, - { - "epoch": 0.34044095546243747, - "grad_norm": 11.79562759399414, - "learning_rate": 4.432598407562604e-05, - "loss": 0.7153, - "step": 38510 - }, - { - "epoch": 0.3405293587227497, - "grad_norm": 4.963253974914551, - "learning_rate": 4.4324510687954176e-05, - "loss": 0.6423, - "step": 38520 - }, - { - "epoch": 0.34061776198306193, - "grad_norm": 3.5599465370178223, - "learning_rate": 4.4323037300282304e-05, - "loss": 0.6866, - "step": 38530 - }, - { - "epoch": 0.34070616524337416, - "grad_norm": 4.488028049468994, - "learning_rate": 4.432156391261043e-05, - "loss": 0.7868, - "step": 38540 - }, - { - "epoch": 0.3407945685036864, - "grad_norm": 2.4642605781555176, - "learning_rate": 4.432009052493856e-05, - "loss": 0.8101, - "step": 38550 - }, - { - "epoch": 0.3408829717639987, - "grad_norm": 4.765092849731445, - "learning_rate": 4.4318617137266696e-05, - "loss": 0.8099, - "step": 38560 - }, - { - "epoch": 0.3409713750243109, - "grad_norm": 2.6710798740386963, - "learning_rate": 4.431714374959482e-05, - "loss": 0.8209, - "step": 38570 - }, - { - "epoch": 0.34105977828462314, - "grad_norm": 2.7096149921417236, - "learning_rate": 4.431567036192295e-05, - "loss": 0.6875, - "step": 38580 - }, - { - "epoch": 0.34114818154493537, - "grad_norm": 1.7125182151794434, - "learning_rate": 4.4314196974251074e-05, - "loss": 0.7881, - "step": 38590 - }, - { - "epoch": 0.3412365848052476, - "grad_norm": 2.053903818130493, - "learning_rate": 4.431272358657921e-05, - "loss": 0.7455, - "step": 38600 - }, - { - "epoch": 0.34132498806555983, - "grad_norm": 5.243853569030762, - "learning_rate": 4.431125019890734e-05, - "loss": 0.6705, - "step": 38610 - }, - { - "epoch": 0.3414133913258721, - "grad_norm": 4.675059795379639, - "learning_rate": 4.4309776811235466e-05, - "loss": 0.7284, - "step": 38620 - }, - { - "epoch": 0.34150179458618435, - "grad_norm": 0.9795480370521545, - "learning_rate": 4.4308303423563595e-05, - "loss": 0.7653, - "step": 38630 - }, - { - "epoch": 0.3415901978464966, - "grad_norm": 2.595299005508423, - "learning_rate": 4.430683003589173e-05, - "loss": 0.7195, - "step": 38640 - }, - { - "epoch": 0.3416786011068088, - "grad_norm": 8.659255981445312, - "learning_rate": 4.430535664821985e-05, - "loss": 0.7188, - "step": 38650 - }, - { - "epoch": 0.34176700436712104, - "grad_norm": 1.6105016469955444, - "learning_rate": 4.4303883260547986e-05, - "loss": 0.6767, - "step": 38660 - }, - { - "epoch": 0.34185540762743327, - "grad_norm": 3.3377585411071777, - "learning_rate": 4.4302409872876115e-05, - "loss": 0.7457, - "step": 38670 - }, - { - "epoch": 0.34194381088774556, - "grad_norm": 3.788832426071167, - "learning_rate": 4.430093648520424e-05, - "loss": 0.7619, - "step": 38680 - }, - { - "epoch": 0.3420322141480578, - "grad_norm": 6.489564418792725, - "learning_rate": 4.429946309753237e-05, - "loss": 0.7225, - "step": 38690 - }, - { - "epoch": 0.34212061740837, - "grad_norm": 1.7470455169677734, - "learning_rate": 4.42979897098605e-05, - "loss": 0.6937, - "step": 38700 - }, - { - "epoch": 0.34220902066868225, - "grad_norm": 6.949313163757324, - "learning_rate": 4.429651632218863e-05, - "loss": 0.6912, - "step": 38710 - }, - { - "epoch": 0.3422974239289945, - "grad_norm": 3.092290163040161, - "learning_rate": 4.429504293451676e-05, - "loss": 0.7623, - "step": 38720 - }, - { - "epoch": 0.34238582718930677, - "grad_norm": 8.560245513916016, - "learning_rate": 4.4293569546844885e-05, - "loss": 0.6472, - "step": 38730 - }, - { - "epoch": 0.342474230449619, - "grad_norm": 4.158474922180176, - "learning_rate": 4.429209615917302e-05, - "loss": 0.6863, - "step": 38740 - }, - { - "epoch": 0.34256263370993123, - "grad_norm": 5.6758503913879395, - "learning_rate": 4.429062277150115e-05, - "loss": 0.702, - "step": 38750 - }, - { - "epoch": 0.34265103697024346, - "grad_norm": 7.173279762268066, - "learning_rate": 4.428914938382928e-05, - "loss": 0.7719, - "step": 38760 - }, - { - "epoch": 0.3427394402305557, - "grad_norm": 3.5123791694641113, - "learning_rate": 4.4287675996157405e-05, - "loss": 0.7444, - "step": 38770 - }, - { - "epoch": 0.3428278434908679, - "grad_norm": 10.433554649353027, - "learning_rate": 4.428620260848554e-05, - "loss": 0.6987, - "step": 38780 - }, - { - "epoch": 0.3429162467511802, - "grad_norm": 3.3632681369781494, - "learning_rate": 4.428472922081366e-05, - "loss": 0.7488, - "step": 38790 - }, - { - "epoch": 0.34300465001149244, - "grad_norm": 2.0097849369049072, - "learning_rate": 4.42832558331418e-05, - "loss": 0.7543, - "step": 38800 - }, - { - "epoch": 0.34309305327180467, - "grad_norm": 6.305533409118652, - "learning_rate": 4.4281782445469925e-05, - "loss": 0.7586, - "step": 38810 - }, - { - "epoch": 0.3431814565321169, - "grad_norm": 4.433217525482178, - "learning_rate": 4.4280309057798054e-05, - "loss": 0.8186, - "step": 38820 - }, - { - "epoch": 0.34326985979242913, - "grad_norm": 1.886404037475586, - "learning_rate": 4.427883567012618e-05, - "loss": 0.7404, - "step": 38830 - }, - { - "epoch": 0.34335826305274136, - "grad_norm": 7.266593933105469, - "learning_rate": 4.427736228245431e-05, - "loss": 0.6237, - "step": 38840 - }, - { - "epoch": 0.34344666631305365, - "grad_norm": 2.7771661281585693, - "learning_rate": 4.427588889478244e-05, - "loss": 0.6898, - "step": 38850 - }, - { - "epoch": 0.3435350695733659, - "grad_norm": 2.3502469062805176, - "learning_rate": 4.4274415507110574e-05, - "loss": 0.7738, - "step": 38860 - }, - { - "epoch": 0.3436234728336781, - "grad_norm": 4.1199822425842285, - "learning_rate": 4.4272942119438695e-05, - "loss": 0.849, - "step": 38870 - }, - { - "epoch": 0.34371187609399034, - "grad_norm": 3.7873480319976807, - "learning_rate": 4.427146873176683e-05, - "loss": 0.6122, - "step": 38880 - }, - { - "epoch": 0.3438002793543026, - "grad_norm": 9.393013000488281, - "learning_rate": 4.426999534409496e-05, - "loss": 0.7628, - "step": 38890 - }, - { - "epoch": 0.3438886826146148, - "grad_norm": 3.412522077560425, - "learning_rate": 4.426852195642309e-05, - "loss": 0.6627, - "step": 38900 - }, - { - "epoch": 0.3439770858749271, - "grad_norm": 4.772474765777588, - "learning_rate": 4.4267048568751216e-05, - "loss": 0.8214, - "step": 38910 - }, - { - "epoch": 0.3440654891352393, - "grad_norm": 4.12615442276001, - "learning_rate": 4.426557518107935e-05, - "loss": 0.6827, - "step": 38920 - }, - { - "epoch": 0.34415389239555155, - "grad_norm": 3.1334691047668457, - "learning_rate": 4.426410179340747e-05, - "loss": 0.6754, - "step": 38930 - }, - { - "epoch": 0.3442422956558638, - "grad_norm": 2.491276979446411, - "learning_rate": 4.426262840573561e-05, - "loss": 0.7243, - "step": 38940 - }, - { - "epoch": 0.344330698916176, - "grad_norm": 1.7073016166687012, - "learning_rate": 4.4261155018063736e-05, - "loss": 0.7204, - "step": 38950 - }, - { - "epoch": 0.34441910217648825, - "grad_norm": 6.672577381134033, - "learning_rate": 4.4259681630391864e-05, - "loss": 0.6357, - "step": 38960 - }, - { - "epoch": 0.34450750543680053, - "grad_norm": 3.4583609104156494, - "learning_rate": 4.425820824271999e-05, - "loss": 0.7868, - "step": 38970 - }, - { - "epoch": 0.34459590869711276, - "grad_norm": 2.7108612060546875, - "learning_rate": 4.425673485504812e-05, - "loss": 0.7452, - "step": 38980 - }, - { - "epoch": 0.344684311957425, - "grad_norm": 3.729666233062744, - "learning_rate": 4.425526146737625e-05, - "loss": 0.796, - "step": 38990 - }, - { - "epoch": 0.3447727152177372, - "grad_norm": 2.58547306060791, - "learning_rate": 4.4253788079704384e-05, - "loss": 0.7642, - "step": 39000 - }, - { - "epoch": 0.34486111847804946, - "grad_norm": 1.6560652256011963, - "learning_rate": 4.425231469203251e-05, - "loss": 0.8992, - "step": 39010 - }, - { - "epoch": 0.3449495217383617, - "grad_norm": 2.2690773010253906, - "learning_rate": 4.425084130436064e-05, - "loss": 0.662, - "step": 39020 - }, - { - "epoch": 0.345037924998674, - "grad_norm": 8.749836921691895, - "learning_rate": 4.424936791668877e-05, - "loss": 0.7323, - "step": 39030 - }, - { - "epoch": 0.3451263282589862, - "grad_norm": 1.8457791805267334, - "learning_rate": 4.42478945290169e-05, - "loss": 0.7426, - "step": 39040 - }, - { - "epoch": 0.34521473151929843, - "grad_norm": 2.8448383808135986, - "learning_rate": 4.4246421141345026e-05, - "loss": 0.7657, - "step": 39050 - }, - { - "epoch": 0.34530313477961067, - "grad_norm": 2.9351272583007812, - "learning_rate": 4.4244947753673154e-05, - "loss": 0.7119, - "step": 39060 - }, - { - "epoch": 0.3453915380399229, - "grad_norm": 6.2362961769104, - "learning_rate": 4.424347436600129e-05, - "loss": 0.8469, - "step": 39070 - }, - { - "epoch": 0.3454799413002351, - "grad_norm": 6.559598922729492, - "learning_rate": 4.424200097832942e-05, - "loss": 0.6604, - "step": 39080 - }, - { - "epoch": 0.3455683445605474, - "grad_norm": 6.368211269378662, - "learning_rate": 4.4240527590657546e-05, - "loss": 0.5775, - "step": 39090 - }, - { - "epoch": 0.34565674782085964, - "grad_norm": 1.8094475269317627, - "learning_rate": 4.4239054202985675e-05, - "loss": 0.7922, - "step": 39100 - }, - { - "epoch": 0.3457451510811719, - "grad_norm": 9.018067359924316, - "learning_rate": 4.42375808153138e-05, - "loss": 0.8196, - "step": 39110 - }, - { - "epoch": 0.3458335543414841, - "grad_norm": 4.900691032409668, - "learning_rate": 4.423610742764193e-05, - "loss": 0.6535, - "step": 39120 - }, - { - "epoch": 0.34592195760179634, - "grad_norm": 8.475072860717773, - "learning_rate": 4.4234634039970066e-05, - "loss": 0.7438, - "step": 39130 - }, - { - "epoch": 0.34601036086210857, - "grad_norm": 2.53385853767395, - "learning_rate": 4.4233160652298195e-05, - "loss": 0.626, - "step": 39140 - }, - { - "epoch": 0.34609876412242085, - "grad_norm": 7.454282760620117, - "learning_rate": 4.423168726462632e-05, - "loss": 0.6581, - "step": 39150 - }, - { - "epoch": 0.3461871673827331, - "grad_norm": 2.232750415802002, - "learning_rate": 4.423021387695445e-05, - "loss": 0.6313, - "step": 39160 - }, - { - "epoch": 0.3462755706430453, - "grad_norm": 10.551502227783203, - "learning_rate": 4.422874048928258e-05, - "loss": 0.666, - "step": 39170 - }, - { - "epoch": 0.34636397390335755, - "grad_norm": 2.270404577255249, - "learning_rate": 4.422726710161071e-05, - "loss": 0.6959, - "step": 39180 - }, - { - "epoch": 0.3464523771636698, - "grad_norm": 6.183990955352783, - "learning_rate": 4.422579371393884e-05, - "loss": 0.6748, - "step": 39190 - }, - { - "epoch": 0.346540780423982, - "grad_norm": 2.9584808349609375, - "learning_rate": 4.4224320326266965e-05, - "loss": 0.6835, - "step": 39200 - }, - { - "epoch": 0.3466291836842943, - "grad_norm": 7.449177265167236, - "learning_rate": 4.42228469385951e-05, - "loss": 0.7968, - "step": 39210 - }, - { - "epoch": 0.3467175869446065, - "grad_norm": 4.392550945281982, - "learning_rate": 4.422137355092323e-05, - "loss": 0.7211, - "step": 39220 - }, - { - "epoch": 0.34680599020491876, - "grad_norm": 7.9824066162109375, - "learning_rate": 4.421990016325136e-05, - "loss": 0.6396, - "step": 39230 - }, - { - "epoch": 0.346894393465231, - "grad_norm": 8.24870777130127, - "learning_rate": 4.4218426775579485e-05, - "loss": 0.5851, - "step": 39240 - }, - { - "epoch": 0.3469827967255432, - "grad_norm": 5.892064571380615, - "learning_rate": 4.421695338790762e-05, - "loss": 0.637, - "step": 39250 - }, - { - "epoch": 0.3470711999858555, - "grad_norm": 1.8518664836883545, - "learning_rate": 4.421548000023574e-05, - "loss": 0.6943, - "step": 39260 - }, - { - "epoch": 0.34715960324616774, - "grad_norm": 2.4457929134368896, - "learning_rate": 4.421400661256388e-05, - "loss": 0.7044, - "step": 39270 - }, - { - "epoch": 0.34724800650647997, - "grad_norm": 6.163054943084717, - "learning_rate": 4.4212533224892005e-05, - "loss": 0.7894, - "step": 39280 - }, - { - "epoch": 0.3473364097667922, - "grad_norm": 3.261793613433838, - "learning_rate": 4.4211059837220134e-05, - "loss": 0.6195, - "step": 39290 - }, - { - "epoch": 0.34742481302710443, - "grad_norm": 3.6927614212036133, - "learning_rate": 4.420958644954826e-05, - "loss": 0.7456, - "step": 39300 - }, - { - "epoch": 0.34751321628741666, - "grad_norm": 3.9024741649627686, - "learning_rate": 4.420811306187639e-05, - "loss": 0.8354, - "step": 39310 - }, - { - "epoch": 0.34760161954772895, - "grad_norm": 6.899050235748291, - "learning_rate": 4.420663967420452e-05, - "loss": 0.6942, - "step": 39320 - }, - { - "epoch": 0.3476900228080412, - "grad_norm": 2.2269694805145264, - "learning_rate": 4.4205166286532654e-05, - "loss": 0.7314, - "step": 39330 - }, - { - "epoch": 0.3477784260683534, - "grad_norm": 4.20051908493042, - "learning_rate": 4.4203692898860775e-05, - "loss": 0.7062, - "step": 39340 - }, - { - "epoch": 0.34786682932866564, - "grad_norm": 4.613553047180176, - "learning_rate": 4.420221951118891e-05, - "loss": 0.8253, - "step": 39350 - }, - { - "epoch": 0.34795523258897787, - "grad_norm": 13.918585777282715, - "learning_rate": 4.420074612351704e-05, - "loss": 0.7943, - "step": 39360 - }, - { - "epoch": 0.3480436358492901, - "grad_norm": 3.904947280883789, - "learning_rate": 4.419927273584517e-05, - "loss": 0.8599, - "step": 39370 - }, - { - "epoch": 0.3481320391096024, - "grad_norm": 7.048567771911621, - "learning_rate": 4.4197799348173296e-05, - "loss": 0.6769, - "step": 39380 - }, - { - "epoch": 0.3482204423699146, - "grad_norm": 2.5431551933288574, - "learning_rate": 4.419632596050143e-05, - "loss": 0.6828, - "step": 39390 - }, - { - "epoch": 0.34830884563022685, - "grad_norm": 1.659877061843872, - "learning_rate": 4.419485257282955e-05, - "loss": 0.7342, - "step": 39400 - }, - { - "epoch": 0.3483972488905391, - "grad_norm": 2.5533907413482666, - "learning_rate": 4.419337918515769e-05, - "loss": 0.7747, - "step": 39410 - }, - { - "epoch": 0.3484856521508513, - "grad_norm": 4.082030296325684, - "learning_rate": 4.419190579748581e-05, - "loss": 0.8179, - "step": 39420 - }, - { - "epoch": 0.34857405541116354, - "grad_norm": 6.7454118728637695, - "learning_rate": 4.4190432409813944e-05, - "loss": 0.7295, - "step": 39430 - }, - { - "epoch": 0.34866245867147583, - "grad_norm": 11.954776763916016, - "learning_rate": 4.418895902214207e-05, - "loss": 0.7646, - "step": 39440 - }, - { - "epoch": 0.34875086193178806, - "grad_norm": 14.232224464416504, - "learning_rate": 4.41874856344702e-05, - "loss": 0.6452, - "step": 39450 - }, - { - "epoch": 0.3488392651921003, - "grad_norm": 2.053473711013794, - "learning_rate": 4.418601224679833e-05, - "loss": 0.7143, - "step": 39460 - }, - { - "epoch": 0.3489276684524125, - "grad_norm": 1.4256279468536377, - "learning_rate": 4.4184538859126464e-05, - "loss": 0.7698, - "step": 39470 - }, - { - "epoch": 0.34901607171272475, - "grad_norm": 19.723196029663086, - "learning_rate": 4.4183065471454586e-05, - "loss": 0.7785, - "step": 39480 - }, - { - "epoch": 0.349104474973037, - "grad_norm": 2.6459712982177734, - "learning_rate": 4.418159208378272e-05, - "loss": 0.6502, - "step": 39490 - }, - { - "epoch": 0.34919287823334927, - "grad_norm": 2.494441032409668, - "learning_rate": 4.418011869611085e-05, - "loss": 0.7649, - "step": 39500 - }, - { - "epoch": 0.3492812814936615, - "grad_norm": 3.158606767654419, - "learning_rate": 4.417864530843898e-05, - "loss": 0.7775, - "step": 39510 - }, - { - "epoch": 0.34936968475397373, - "grad_norm": 12.12488079071045, - "learning_rate": 4.4177171920767106e-05, - "loss": 0.7198, - "step": 39520 - }, - { - "epoch": 0.34945808801428596, - "grad_norm": 2.545377254486084, - "learning_rate": 4.4175698533095235e-05, - "loss": 0.7555, - "step": 39530 - }, - { - "epoch": 0.3495464912745982, - "grad_norm": 2.610600233078003, - "learning_rate": 4.417422514542336e-05, - "loss": 0.6876, - "step": 39540 - }, - { - "epoch": 0.3496348945349104, - "grad_norm": 4.401505470275879, - "learning_rate": 4.41727517577515e-05, - "loss": 0.6802, - "step": 39550 - }, - { - "epoch": 0.3497232977952227, - "grad_norm": 13.02286434173584, - "learning_rate": 4.417127837007962e-05, - "loss": 0.7054, - "step": 39560 - }, - { - "epoch": 0.34981170105553494, - "grad_norm": 5.426609039306641, - "learning_rate": 4.4169804982407755e-05, - "loss": 0.7053, - "step": 39570 - }, - { - "epoch": 0.34990010431584717, - "grad_norm": 3.6680009365081787, - "learning_rate": 4.416833159473588e-05, - "loss": 0.6769, - "step": 39580 - }, - { - "epoch": 0.3499885075761594, - "grad_norm": 6.889299392700195, - "learning_rate": 4.416685820706401e-05, - "loss": 0.8272, - "step": 39590 - }, - { - "epoch": 0.35007691083647163, - "grad_norm": 2.7408905029296875, - "learning_rate": 4.416538481939214e-05, - "loss": 0.7558, - "step": 39600 - }, - { - "epoch": 0.35016531409678386, - "grad_norm": 6.113536357879639, - "learning_rate": 4.4163911431720275e-05, - "loss": 0.7436, - "step": 39610 - }, - { - "epoch": 0.35025371735709615, - "grad_norm": 9.523975372314453, - "learning_rate": 4.4162438044048396e-05, - "loss": 0.648, - "step": 39620 - }, - { - "epoch": 0.3503421206174084, - "grad_norm": 2.6401007175445557, - "learning_rate": 4.416096465637653e-05, - "loss": 0.8088, - "step": 39630 - }, - { - "epoch": 0.3504305238777206, - "grad_norm": 5.548010349273682, - "learning_rate": 4.415949126870466e-05, - "loss": 0.6727, - "step": 39640 - }, - { - "epoch": 0.35051892713803284, - "grad_norm": 5.084409713745117, - "learning_rate": 4.415801788103279e-05, - "loss": 0.7815, - "step": 39650 - }, - { - "epoch": 0.3506073303983451, - "grad_norm": 6.08608865737915, - "learning_rate": 4.415654449336092e-05, - "loss": 0.7262, - "step": 39660 - }, - { - "epoch": 0.3506957336586573, - "grad_norm": 4.022087097167969, - "learning_rate": 4.4155071105689045e-05, - "loss": 0.7984, - "step": 39670 - }, - { - "epoch": 0.3507841369189696, - "grad_norm": 5.5862531661987305, - "learning_rate": 4.415359771801717e-05, - "loss": 0.9135, - "step": 39680 - }, - { - "epoch": 0.3508725401792818, - "grad_norm": 5.848991870880127, - "learning_rate": 4.415212433034531e-05, - "loss": 0.6956, - "step": 39690 - }, - { - "epoch": 0.35096094343959405, - "grad_norm": 6.420487880706787, - "learning_rate": 4.415065094267343e-05, - "loss": 0.761, - "step": 39700 - }, - { - "epoch": 0.3510493466999063, - "grad_norm": 6.251640319824219, - "learning_rate": 4.4149177555001565e-05, - "loss": 0.6705, - "step": 39710 - }, - { - "epoch": 0.3511377499602185, - "grad_norm": 9.221297264099121, - "learning_rate": 4.4147704167329694e-05, - "loss": 0.7528, - "step": 39720 - }, - { - "epoch": 0.35122615322053075, - "grad_norm": 1.613277554512024, - "learning_rate": 4.414623077965782e-05, - "loss": 0.7531, - "step": 39730 - }, - { - "epoch": 0.35131455648084303, - "grad_norm": 1.8966875076293945, - "learning_rate": 4.414475739198595e-05, - "loss": 0.7017, - "step": 39740 - }, - { - "epoch": 0.35140295974115526, - "grad_norm": 1.356676697731018, - "learning_rate": 4.4143284004314085e-05, - "loss": 0.9041, - "step": 39750 - }, - { - "epoch": 0.3514913630014675, - "grad_norm": 2.4049720764160156, - "learning_rate": 4.414181061664221e-05, - "loss": 0.8855, - "step": 39760 - }, - { - "epoch": 0.3515797662617797, - "grad_norm": 2.64721417427063, - "learning_rate": 4.414033722897034e-05, - "loss": 0.7483, - "step": 39770 - }, - { - "epoch": 0.35166816952209196, - "grad_norm": 2.6219327449798584, - "learning_rate": 4.4138863841298464e-05, - "loss": 0.7394, - "step": 39780 - }, - { - "epoch": 0.35175657278240424, - "grad_norm": 2.7238476276397705, - "learning_rate": 4.41373904536266e-05, - "loss": 0.6265, - "step": 39790 - }, - { - "epoch": 0.3518449760427165, - "grad_norm": 2.0804193019866943, - "learning_rate": 4.413591706595473e-05, - "loss": 0.5891, - "step": 39800 - }, - { - "epoch": 0.3519333793030287, - "grad_norm": 2.7444496154785156, - "learning_rate": 4.4134443678282856e-05, - "loss": 0.6599, - "step": 39810 - }, - { - "epoch": 0.35202178256334093, - "grad_norm": 5.589512348175049, - "learning_rate": 4.4132970290610984e-05, - "loss": 0.6934, - "step": 39820 - }, - { - "epoch": 0.35211018582365317, - "grad_norm": 5.298748970031738, - "learning_rate": 4.413149690293912e-05, - "loss": 0.9626, - "step": 39830 - }, - { - "epoch": 0.3521985890839654, - "grad_norm": 1.9821408987045288, - "learning_rate": 4.413002351526724e-05, - "loss": 0.7993, - "step": 39840 - }, - { - "epoch": 0.3522869923442777, - "grad_norm": 2.8862950801849365, - "learning_rate": 4.4128550127595376e-05, - "loss": 0.7875, - "step": 39850 - }, - { - "epoch": 0.3523753956045899, - "grad_norm": 5.353822231292725, - "learning_rate": 4.4127076739923504e-05, - "loss": 0.8324, - "step": 39860 - }, - { - "epoch": 0.35246379886490214, - "grad_norm": 6.620767593383789, - "learning_rate": 4.412560335225163e-05, - "loss": 0.72, - "step": 39870 - }, - { - "epoch": 0.3525522021252144, - "grad_norm": 10.123428344726562, - "learning_rate": 4.412412996457976e-05, - "loss": 0.6088, - "step": 39880 - }, - { - "epoch": 0.3526406053855266, - "grad_norm": 2.5653140544891357, - "learning_rate": 4.412265657690789e-05, - "loss": 0.7364, - "step": 39890 - }, - { - "epoch": 0.35272900864583884, - "grad_norm": 3.4880590438842773, - "learning_rate": 4.412118318923602e-05, - "loss": 0.612, - "step": 39900 - }, - { - "epoch": 0.3528174119061511, - "grad_norm": 4.04166841506958, - "learning_rate": 4.411970980156415e-05, - "loss": 0.6852, - "step": 39910 - }, - { - "epoch": 0.35290581516646335, - "grad_norm": 12.76233959197998, - "learning_rate": 4.411823641389228e-05, - "loss": 0.7562, - "step": 39920 - }, - { - "epoch": 0.3529942184267756, - "grad_norm": 2.44209361076355, - "learning_rate": 4.411676302622041e-05, - "loss": 0.6899, - "step": 39930 - }, - { - "epoch": 0.3530826216870878, - "grad_norm": 2.385620355606079, - "learning_rate": 4.411528963854854e-05, - "loss": 0.7664, - "step": 39940 - }, - { - "epoch": 0.35317102494740005, - "grad_norm": 6.197722911834717, - "learning_rate": 4.4113816250876666e-05, - "loss": 0.7016, - "step": 39950 - }, - { - "epoch": 0.3532594282077123, - "grad_norm": 2.3152050971984863, - "learning_rate": 4.4112342863204794e-05, - "loss": 0.7747, - "step": 39960 - }, - { - "epoch": 0.35334783146802456, - "grad_norm": 4.877121448516846, - "learning_rate": 4.411086947553293e-05, - "loss": 0.7875, - "step": 39970 - }, - { - "epoch": 0.3534362347283368, - "grad_norm": 2.164944648742676, - "learning_rate": 4.410939608786106e-05, - "loss": 0.6888, - "step": 39980 - }, - { - "epoch": 0.353524637988649, - "grad_norm": 1.504726767539978, - "learning_rate": 4.4107922700189186e-05, - "loss": 0.7891, - "step": 39990 - }, - { - "epoch": 0.35361304124896126, - "grad_norm": 4.400787353515625, - "learning_rate": 4.4106449312517315e-05, - "loss": 0.7454, - "step": 40000 - }, - { - "epoch": 0.3537014445092735, - "grad_norm": 2.773509979248047, - "learning_rate": 4.410497592484544e-05, - "loss": 0.6843, - "step": 40010 - }, - { - "epoch": 0.3537898477695857, - "grad_norm": 3.336489677429199, - "learning_rate": 4.410350253717357e-05, - "loss": 0.7951, - "step": 40020 - }, - { - "epoch": 0.353878251029898, - "grad_norm": 1.953701138496399, - "learning_rate": 4.41020291495017e-05, - "loss": 0.7838, - "step": 40030 - }, - { - "epoch": 0.35396665429021024, - "grad_norm": 7.4070143699646, - "learning_rate": 4.4100555761829835e-05, - "loss": 0.729, - "step": 40040 - }, - { - "epoch": 0.35405505755052247, - "grad_norm": 5.879505634307861, - "learning_rate": 4.409908237415796e-05, - "loss": 0.5443, - "step": 40050 - }, - { - "epoch": 0.3541434608108347, - "grad_norm": 1.577394962310791, - "learning_rate": 4.409760898648609e-05, - "loss": 0.7443, - "step": 40060 - }, - { - "epoch": 0.35423186407114693, - "grad_norm": 12.447354316711426, - "learning_rate": 4.409613559881422e-05, - "loss": 0.8277, - "step": 40070 - }, - { - "epoch": 0.35432026733145916, - "grad_norm": 1.5400261878967285, - "learning_rate": 4.409466221114235e-05, - "loss": 0.6151, - "step": 40080 - }, - { - "epoch": 0.35440867059177145, - "grad_norm": 7.868757247924805, - "learning_rate": 4.4093188823470477e-05, - "loss": 0.7102, - "step": 40090 - }, - { - "epoch": 0.3544970738520837, - "grad_norm": 2.3401095867156982, - "learning_rate": 4.409171543579861e-05, - "loss": 0.814, - "step": 40100 - }, - { - "epoch": 0.3545854771123959, - "grad_norm": 5.074752330780029, - "learning_rate": 4.409024204812674e-05, - "loss": 0.7593, - "step": 40110 - }, - { - "epoch": 0.35467388037270814, - "grad_norm": 12.343538284301758, - "learning_rate": 4.408876866045487e-05, - "loss": 0.6912, - "step": 40120 - }, - { - "epoch": 0.35476228363302037, - "grad_norm": 7.01021146774292, - "learning_rate": 4.4087295272783e-05, - "loss": 0.6184, - "step": 40130 - }, - { - "epoch": 0.3548506868933326, - "grad_norm": 5.7294721603393555, - "learning_rate": 4.4085821885111125e-05, - "loss": 0.6806, - "step": 40140 - }, - { - "epoch": 0.3549390901536449, - "grad_norm": 5.026396751403809, - "learning_rate": 4.4084348497439253e-05, - "loss": 0.614, - "step": 40150 - }, - { - "epoch": 0.3550274934139571, - "grad_norm": 1.325736403465271, - "learning_rate": 4.408287510976739e-05, - "loss": 0.6932, - "step": 40160 - }, - { - "epoch": 0.35511589667426935, - "grad_norm": 3.47292423248291, - "learning_rate": 4.408140172209551e-05, - "loss": 0.8155, - "step": 40170 - }, - { - "epoch": 0.3552042999345816, - "grad_norm": 5.075028896331787, - "learning_rate": 4.4079928334423645e-05, - "loss": 0.7405, - "step": 40180 - }, - { - "epoch": 0.3552927031948938, - "grad_norm": 6.216986656188965, - "learning_rate": 4.4078454946751774e-05, - "loss": 0.7883, - "step": 40190 - }, - { - "epoch": 0.35538110645520604, - "grad_norm": 3.336992025375366, - "learning_rate": 4.40769815590799e-05, - "loss": 0.744, - "step": 40200 - }, - { - "epoch": 0.35546950971551833, - "grad_norm": 1.6610621213912964, - "learning_rate": 4.407550817140803e-05, - "loss": 0.6141, - "step": 40210 - }, - { - "epoch": 0.35555791297583056, - "grad_norm": 4.76738166809082, - "learning_rate": 4.4074034783736165e-05, - "loss": 0.6706, - "step": 40220 - }, - { - "epoch": 0.3556463162361428, - "grad_norm": 9.403095245361328, - "learning_rate": 4.407256139606429e-05, - "loss": 0.6813, - "step": 40230 - }, - { - "epoch": 0.355734719496455, - "grad_norm": 7.407747268676758, - "learning_rate": 4.407108800839242e-05, - "loss": 0.6746, - "step": 40240 - }, - { - "epoch": 0.35582312275676725, - "grad_norm": 17.36669158935547, - "learning_rate": 4.4069614620720544e-05, - "loss": 0.6397, - "step": 40250 - }, - { - "epoch": 0.3559115260170795, - "grad_norm": 6.369690895080566, - "learning_rate": 4.406814123304868e-05, - "loss": 0.768, - "step": 40260 - }, - { - "epoch": 0.35599992927739177, - "grad_norm": 3.3993122577667236, - "learning_rate": 4.406666784537681e-05, - "loss": 0.7498, - "step": 40270 - }, - { - "epoch": 0.356088332537704, - "grad_norm": 2.4197874069213867, - "learning_rate": 4.4065194457704936e-05, - "loss": 0.8067, - "step": 40280 - }, - { - "epoch": 0.35617673579801623, - "grad_norm": 1.7032784223556519, - "learning_rate": 4.4063721070033064e-05, - "loss": 0.589, - "step": 40290 - }, - { - "epoch": 0.35626513905832846, - "grad_norm": 5.97502326965332, - "learning_rate": 4.40622476823612e-05, - "loss": 0.7253, - "step": 40300 - }, - { - "epoch": 0.3563535423186407, - "grad_norm": 3.3060402870178223, - "learning_rate": 4.406077429468932e-05, - "loss": 0.6148, - "step": 40310 - }, - { - "epoch": 0.356441945578953, - "grad_norm": 4.172302722930908, - "learning_rate": 4.4059300907017456e-05, - "loss": 0.6672, - "step": 40320 - }, - { - "epoch": 0.3565303488392652, - "grad_norm": 3.88161301612854, - "learning_rate": 4.4057827519345584e-05, - "loss": 0.6523, - "step": 40330 - }, - { - "epoch": 0.35661875209957744, - "grad_norm": 3.1442339420318604, - "learning_rate": 4.405635413167371e-05, - "loss": 0.6888, - "step": 40340 - }, - { - "epoch": 0.35670715535988967, - "grad_norm": 3.7767527103424072, - "learning_rate": 4.405488074400184e-05, - "loss": 0.7837, - "step": 40350 - }, - { - "epoch": 0.3567955586202019, - "grad_norm": 2.4356746673583984, - "learning_rate": 4.405340735632997e-05, - "loss": 0.7589, - "step": 40360 - }, - { - "epoch": 0.35688396188051413, - "grad_norm": 1.566416621208191, - "learning_rate": 4.40519339686581e-05, - "loss": 0.7081, - "step": 40370 - }, - { - "epoch": 0.3569723651408264, - "grad_norm": 5.95517110824585, - "learning_rate": 4.405046058098623e-05, - "loss": 0.8114, - "step": 40380 - }, - { - "epoch": 0.35706076840113865, - "grad_norm": 2.393517255783081, - "learning_rate": 4.4048987193314354e-05, - "loss": 0.7004, - "step": 40390 - }, - { - "epoch": 0.3571491716614509, - "grad_norm": 3.452021598815918, - "learning_rate": 4.404751380564249e-05, - "loss": 0.565, - "step": 40400 - }, - { - "epoch": 0.3572375749217631, - "grad_norm": 2.5372750759124756, - "learning_rate": 4.404604041797062e-05, - "loss": 0.7225, - "step": 40410 - }, - { - "epoch": 0.35732597818207534, - "grad_norm": 5.6464338302612305, - "learning_rate": 4.4044567030298746e-05, - "loss": 0.7225, - "step": 40420 - }, - { - "epoch": 0.3574143814423876, - "grad_norm": 4.8920392990112305, - "learning_rate": 4.4043093642626874e-05, - "loss": 0.8245, - "step": 40430 - }, - { - "epoch": 0.35750278470269986, - "grad_norm": 2.4476685523986816, - "learning_rate": 4.404162025495501e-05, - "loss": 0.6622, - "step": 40440 - }, - { - "epoch": 0.3575911879630121, - "grad_norm": 9.860517501831055, - "learning_rate": 4.404014686728313e-05, - "loss": 0.5973, - "step": 40450 - }, - { - "epoch": 0.3576795912233243, - "grad_norm": 2.5402731895446777, - "learning_rate": 4.4038673479611266e-05, - "loss": 0.6907, - "step": 40460 - }, - { - "epoch": 0.35776799448363655, - "grad_norm": 6.171511650085449, - "learning_rate": 4.403720009193939e-05, - "loss": 0.6051, - "step": 40470 - }, - { - "epoch": 0.3578563977439488, - "grad_norm": 1.7023460865020752, - "learning_rate": 4.403572670426752e-05, - "loss": 0.7183, - "step": 40480 - }, - { - "epoch": 0.357944801004261, - "grad_norm": 3.548947811126709, - "learning_rate": 4.403425331659565e-05, - "loss": 0.6988, - "step": 40490 - }, - { - "epoch": 0.3580332042645733, - "grad_norm": 2.4574687480926514, - "learning_rate": 4.403277992892378e-05, - "loss": 0.7046, - "step": 40500 - }, - { - "epoch": 0.35812160752488553, - "grad_norm": 7.232769012451172, - "learning_rate": 4.403130654125191e-05, - "loss": 0.6473, - "step": 40510 - }, - { - "epoch": 0.35821001078519776, - "grad_norm": 10.139755249023438, - "learning_rate": 4.402983315358004e-05, - "loss": 0.6927, - "step": 40520 - }, - { - "epoch": 0.35829841404551, - "grad_norm": 4.592871189117432, - "learning_rate": 4.4028359765908165e-05, - "loss": 0.7979, - "step": 40530 - }, - { - "epoch": 0.3583868173058222, - "grad_norm": 18.107608795166016, - "learning_rate": 4.40268863782363e-05, - "loss": 0.7414, - "step": 40540 - }, - { - "epoch": 0.35847522056613446, - "grad_norm": 2.707333564758301, - "learning_rate": 4.402541299056443e-05, - "loss": 0.6631, - "step": 40550 - }, - { - "epoch": 0.35856362382644674, - "grad_norm": 5.194340229034424, - "learning_rate": 4.402393960289256e-05, - "loss": 0.6022, - "step": 40560 - }, - { - "epoch": 0.358652027086759, - "grad_norm": 1.5801736116409302, - "learning_rate": 4.4022466215220685e-05, - "loss": 0.7492, - "step": 40570 - }, - { - "epoch": 0.3587404303470712, - "grad_norm": 11.096145629882812, - "learning_rate": 4.402099282754882e-05, - "loss": 0.7574, - "step": 40580 - }, - { - "epoch": 0.35882883360738344, - "grad_norm": 3.0222949981689453, - "learning_rate": 4.401951943987694e-05, - "loss": 0.6925, - "step": 40590 - }, - { - "epoch": 0.35891723686769567, - "grad_norm": 3.1919920444488525, - "learning_rate": 4.401804605220508e-05, - "loss": 0.9316, - "step": 40600 - }, - { - "epoch": 0.3590056401280079, - "grad_norm": 1.5551713705062866, - "learning_rate": 4.40165726645332e-05, - "loss": 0.8169, - "step": 40610 - }, - { - "epoch": 0.3590940433883202, - "grad_norm": 6.985397815704346, - "learning_rate": 4.4015099276861334e-05, - "loss": 0.6582, - "step": 40620 - }, - { - "epoch": 0.3591824466486324, - "grad_norm": 1.5592365264892578, - "learning_rate": 4.401362588918946e-05, - "loss": 0.7288, - "step": 40630 - }, - { - "epoch": 0.35927084990894465, - "grad_norm": 6.607183933258057, - "learning_rate": 4.401215250151759e-05, - "loss": 0.7035, - "step": 40640 - }, - { - "epoch": 0.3593592531692569, - "grad_norm": 5.1960368156433105, - "learning_rate": 4.401067911384572e-05, - "loss": 0.8285, - "step": 40650 - }, - { - "epoch": 0.3594476564295691, - "grad_norm": 3.718777656555176, - "learning_rate": 4.4009205726173854e-05, - "loss": 0.6918, - "step": 40660 - }, - { - "epoch": 0.35953605968988134, - "grad_norm": 4.332536220550537, - "learning_rate": 4.4007732338501975e-05, - "loss": 0.8502, - "step": 40670 - }, - { - "epoch": 0.3596244629501936, - "grad_norm": 3.90313720703125, - "learning_rate": 4.400625895083011e-05, - "loss": 0.7566, - "step": 40680 - }, - { - "epoch": 0.35971286621050585, - "grad_norm": 3.1796460151672363, - "learning_rate": 4.400478556315824e-05, - "loss": 0.6631, - "step": 40690 - }, - { - "epoch": 0.3598012694708181, - "grad_norm": 2.104417085647583, - "learning_rate": 4.400331217548637e-05, - "loss": 0.8594, - "step": 40700 - }, - { - "epoch": 0.3598896727311303, - "grad_norm": 8.83961296081543, - "learning_rate": 4.4001838787814495e-05, - "loss": 0.7134, - "step": 40710 - }, - { - "epoch": 0.35997807599144255, - "grad_norm": 9.377950668334961, - "learning_rate": 4.4000365400142624e-05, - "loss": 0.6579, - "step": 40720 - }, - { - "epoch": 0.3600664792517548, - "grad_norm": 5.622836112976074, - "learning_rate": 4.399889201247075e-05, - "loss": 0.6832, - "step": 40730 - }, - { - "epoch": 0.36015488251206706, - "grad_norm": 2.3973677158355713, - "learning_rate": 4.399741862479889e-05, - "loss": 0.6981, - "step": 40740 - }, - { - "epoch": 0.3602432857723793, - "grad_norm": 3.3558311462402344, - "learning_rate": 4.399594523712701e-05, - "loss": 0.7008, - "step": 40750 - }, - { - "epoch": 0.3603316890326915, - "grad_norm": 3.977409601211548, - "learning_rate": 4.3994471849455144e-05, - "loss": 0.794, - "step": 40760 - }, - { - "epoch": 0.36042009229300376, - "grad_norm": 0.9260974526405334, - "learning_rate": 4.399299846178327e-05, - "loss": 0.6739, - "step": 40770 - }, - { - "epoch": 0.360508495553316, - "grad_norm": 2.4397428035736084, - "learning_rate": 4.39915250741114e-05, - "loss": 0.7254, - "step": 40780 - }, - { - "epoch": 0.3605968988136282, - "grad_norm": 3.673962354660034, - "learning_rate": 4.399005168643953e-05, - "loss": 0.7404, - "step": 40790 - }, - { - "epoch": 0.3606853020739405, - "grad_norm": 15.185490608215332, - "learning_rate": 4.3988578298767664e-05, - "loss": 0.7712, - "step": 40800 - }, - { - "epoch": 0.36077370533425274, - "grad_norm": 2.7475123405456543, - "learning_rate": 4.3987104911095786e-05, - "loss": 0.7129, - "step": 40810 - }, - { - "epoch": 0.36086210859456497, - "grad_norm": 4.323202610015869, - "learning_rate": 4.398563152342392e-05, - "loss": 0.6802, - "step": 40820 - }, - { - "epoch": 0.3609505118548772, - "grad_norm": 1.4806162118911743, - "learning_rate": 4.398415813575205e-05, - "loss": 0.7264, - "step": 40830 - }, - { - "epoch": 0.36103891511518943, - "grad_norm": 3.3781514167785645, - "learning_rate": 4.398268474808018e-05, - "loss": 0.6933, - "step": 40840 - }, - { - "epoch": 0.3611273183755017, - "grad_norm": 2.4755170345306396, - "learning_rate": 4.3981211360408306e-05, - "loss": 0.8133, - "step": 40850 - }, - { - "epoch": 0.36121572163581395, - "grad_norm": 6.750313758850098, - "learning_rate": 4.3979737972736434e-05, - "loss": 0.7433, - "step": 40860 - }, - { - "epoch": 0.3613041248961262, - "grad_norm": 10.428040504455566, - "learning_rate": 4.397826458506456e-05, - "loss": 0.6626, - "step": 40870 - }, - { - "epoch": 0.3613925281564384, - "grad_norm": 8.72851848602295, - "learning_rate": 4.39767911973927e-05, - "loss": 0.6704, - "step": 40880 - }, - { - "epoch": 0.36148093141675064, - "grad_norm": 15.175640106201172, - "learning_rate": 4.3975317809720826e-05, - "loss": 0.7522, - "step": 40890 - }, - { - "epoch": 0.36156933467706287, - "grad_norm": 6.841087818145752, - "learning_rate": 4.3973844422048955e-05, - "loss": 0.6681, - "step": 40900 - }, - { - "epoch": 0.36165773793737516, - "grad_norm": 7.627110958099365, - "learning_rate": 4.397237103437708e-05, - "loss": 0.7088, - "step": 40910 - }, - { - "epoch": 0.3617461411976874, - "grad_norm": 1.9037063121795654, - "learning_rate": 4.397089764670521e-05, - "loss": 0.6926, - "step": 40920 - }, - { - "epoch": 0.3618345444579996, - "grad_norm": 4.059687614440918, - "learning_rate": 4.396942425903334e-05, - "loss": 0.67, - "step": 40930 - }, - { - "epoch": 0.36192294771831185, - "grad_norm": 2.2511236667633057, - "learning_rate": 4.396795087136147e-05, - "loss": 0.738, - "step": 40940 - }, - { - "epoch": 0.3620113509786241, - "grad_norm": 2.366767406463623, - "learning_rate": 4.39664774836896e-05, - "loss": 0.6747, - "step": 40950 - }, - { - "epoch": 0.3620997542389363, - "grad_norm": 3.485605239868164, - "learning_rate": 4.396500409601773e-05, - "loss": 0.6978, - "step": 40960 - }, - { - "epoch": 0.3621881574992486, - "grad_norm": 1.3318157196044922, - "learning_rate": 4.396353070834586e-05, - "loss": 0.7764, - "step": 40970 - }, - { - "epoch": 0.36227656075956083, - "grad_norm": 2.444885492324829, - "learning_rate": 4.396205732067399e-05, - "loss": 0.7502, - "step": 40980 - }, - { - "epoch": 0.36236496401987306, - "grad_norm": 11.757702827453613, - "learning_rate": 4.3960583933002116e-05, - "loss": 0.8271, - "step": 40990 - }, - { - "epoch": 0.3624533672801853, - "grad_norm": 3.8691651821136475, - "learning_rate": 4.3959110545330245e-05, - "loss": 0.7425, - "step": 41000 - }, - { - "epoch": 0.3625417705404975, - "grad_norm": 6.782963752746582, - "learning_rate": 4.395763715765838e-05, - "loss": 0.7009, - "step": 41010 - }, - { - "epoch": 0.36263017380080975, - "grad_norm": 8.647643089294434, - "learning_rate": 4.395616376998651e-05, - "loss": 0.689, - "step": 41020 - }, - { - "epoch": 0.36271857706112204, - "grad_norm": 6.964686393737793, - "learning_rate": 4.395469038231464e-05, - "loss": 0.576, - "step": 41030 - }, - { - "epoch": 0.36280698032143427, - "grad_norm": 10.234725952148438, - "learning_rate": 4.3953216994642765e-05, - "loss": 0.6068, - "step": 41040 - }, - { - "epoch": 0.3628953835817465, - "grad_norm": 5.168300151824951, - "learning_rate": 4.3951743606970893e-05, - "loss": 0.8187, - "step": 41050 - }, - { - "epoch": 0.36298378684205873, - "grad_norm": 2.50061297416687, - "learning_rate": 4.395027021929902e-05, - "loss": 0.5756, - "step": 41060 - }, - { - "epoch": 0.36307219010237096, - "grad_norm": 5.854412078857422, - "learning_rate": 4.394879683162716e-05, - "loss": 0.796, - "step": 41070 - }, - { - "epoch": 0.3631605933626832, - "grad_norm": 4.857027053833008, - "learning_rate": 4.394732344395528e-05, - "loss": 0.7482, - "step": 41080 - }, - { - "epoch": 0.3632489966229955, - "grad_norm": 5.145595550537109, - "learning_rate": 4.3945850056283414e-05, - "loss": 0.727, - "step": 41090 - }, - { - "epoch": 0.3633373998833077, - "grad_norm": 4.18808650970459, - "learning_rate": 4.394437666861154e-05, - "loss": 0.7778, - "step": 41100 - }, - { - "epoch": 0.36342580314361994, - "grad_norm": 4.522815227508545, - "learning_rate": 4.394290328093967e-05, - "loss": 0.7866, - "step": 41110 - }, - { - "epoch": 0.36351420640393217, - "grad_norm": 2.089757204055786, - "learning_rate": 4.39414298932678e-05, - "loss": 0.7501, - "step": 41120 - }, - { - "epoch": 0.3636026096642444, - "grad_norm": 14.277029991149902, - "learning_rate": 4.3939956505595934e-05, - "loss": 0.7619, - "step": 41130 - }, - { - "epoch": 0.36369101292455663, - "grad_norm": 2.5681841373443604, - "learning_rate": 4.3938483117924055e-05, - "loss": 0.7376, - "step": 41140 - }, - { - "epoch": 0.3637794161848689, - "grad_norm": 5.73472785949707, - "learning_rate": 4.393700973025219e-05, - "loss": 0.7434, - "step": 41150 - }, - { - "epoch": 0.36386781944518115, - "grad_norm": 2.0774855613708496, - "learning_rate": 4.393553634258032e-05, - "loss": 0.6711, - "step": 41160 - }, - { - "epoch": 0.3639562227054934, - "grad_norm": 1.8315597772598267, - "learning_rate": 4.393406295490845e-05, - "loss": 0.7965, - "step": 41170 - }, - { - "epoch": 0.3640446259658056, - "grad_norm": 1.7054177522659302, - "learning_rate": 4.3932589567236576e-05, - "loss": 0.6805, - "step": 41180 - }, - { - "epoch": 0.36413302922611784, - "grad_norm": 3.2949397563934326, - "learning_rate": 4.3931116179564704e-05, - "loss": 0.7417, - "step": 41190 - }, - { - "epoch": 0.3642214324864301, - "grad_norm": 1.2452337741851807, - "learning_rate": 4.392964279189283e-05, - "loss": 0.5739, - "step": 41200 - }, - { - "epoch": 0.36430983574674236, - "grad_norm": 1.940799355506897, - "learning_rate": 4.392816940422097e-05, - "loss": 0.6658, - "step": 41210 - }, - { - "epoch": 0.3643982390070546, - "grad_norm": 20.6791934967041, - "learning_rate": 4.392669601654909e-05, - "loss": 0.8094, - "step": 41220 - }, - { - "epoch": 0.3644866422673668, - "grad_norm": 3.6161069869995117, - "learning_rate": 4.3925222628877224e-05, - "loss": 0.8835, - "step": 41230 - }, - { - "epoch": 0.36457504552767905, - "grad_norm": 9.129081726074219, - "learning_rate": 4.392374924120535e-05, - "loss": 0.9506, - "step": 41240 - }, - { - "epoch": 0.3646634487879913, - "grad_norm": 2.6817915439605713, - "learning_rate": 4.392227585353348e-05, - "loss": 0.8211, - "step": 41250 - }, - { - "epoch": 0.3647518520483035, - "grad_norm": 3.572265386581421, - "learning_rate": 4.392080246586161e-05, - "loss": 0.7337, - "step": 41260 - }, - { - "epoch": 0.3648402553086158, - "grad_norm": 1.5965874195098877, - "learning_rate": 4.3919329078189744e-05, - "loss": 0.7683, - "step": 41270 - }, - { - "epoch": 0.36492865856892803, - "grad_norm": 6.551940441131592, - "learning_rate": 4.3917855690517866e-05, - "loss": 0.8188, - "step": 41280 - }, - { - "epoch": 0.36501706182924026, - "grad_norm": 5.230619430541992, - "learning_rate": 4.3916382302846e-05, - "loss": 0.6382, - "step": 41290 - }, - { - "epoch": 0.3651054650895525, - "grad_norm": 7.282522678375244, - "learning_rate": 4.391490891517412e-05, - "loss": 0.7625, - "step": 41300 - }, - { - "epoch": 0.3651938683498647, - "grad_norm": 2.7996153831481934, - "learning_rate": 4.391343552750226e-05, - "loss": 0.713, - "step": 41310 - }, - { - "epoch": 0.36528227161017696, - "grad_norm": 3.4378294944763184, - "learning_rate": 4.3911962139830386e-05, - "loss": 0.67, - "step": 41320 - }, - { - "epoch": 0.36537067487048924, - "grad_norm": 6.836731433868408, - "learning_rate": 4.3910488752158514e-05, - "loss": 0.6547, - "step": 41330 - }, - { - "epoch": 0.3654590781308015, - "grad_norm": 6.535715579986572, - "learning_rate": 4.390901536448664e-05, - "loss": 0.7291, - "step": 41340 - }, - { - "epoch": 0.3655474813911137, - "grad_norm": 4.123851299285889, - "learning_rate": 4.390754197681478e-05, - "loss": 0.6429, - "step": 41350 - }, - { - "epoch": 0.36563588465142594, - "grad_norm": 15.508459091186523, - "learning_rate": 4.39060685891429e-05, - "loss": 0.673, - "step": 41360 - }, - { - "epoch": 0.36572428791173817, - "grad_norm": 7.07671594619751, - "learning_rate": 4.3904595201471035e-05, - "loss": 0.6897, - "step": 41370 - }, - { - "epoch": 0.36581269117205045, - "grad_norm": 7.29509162902832, - "learning_rate": 4.390312181379916e-05, - "loss": 0.752, - "step": 41380 - }, - { - "epoch": 0.3659010944323627, - "grad_norm": 3.431602954864502, - "learning_rate": 4.390164842612729e-05, - "loss": 0.6463, - "step": 41390 - }, - { - "epoch": 0.3659894976926749, - "grad_norm": 9.897151947021484, - "learning_rate": 4.390017503845542e-05, - "loss": 0.665, - "step": 41400 - }, - { - "epoch": 0.36607790095298715, - "grad_norm": 6.333930969238281, - "learning_rate": 4.389870165078355e-05, - "loss": 0.8146, - "step": 41410 - }, - { - "epoch": 0.3661663042132994, - "grad_norm": 3.1365952491760254, - "learning_rate": 4.3897228263111676e-05, - "loss": 0.809, - "step": 41420 - }, - { - "epoch": 0.3662547074736116, - "grad_norm": 1.5993609428405762, - "learning_rate": 4.389575487543981e-05, - "loss": 0.6031, - "step": 41430 - }, - { - "epoch": 0.3663431107339239, - "grad_norm": 6.380438327789307, - "learning_rate": 4.389428148776793e-05, - "loss": 0.778, - "step": 41440 - }, - { - "epoch": 0.3664315139942361, - "grad_norm": 14.338891983032227, - "learning_rate": 4.389280810009607e-05, - "loss": 0.6802, - "step": 41450 - }, - { - "epoch": 0.36651991725454836, - "grad_norm": 2.1834185123443604, - "learning_rate": 4.3891334712424197e-05, - "loss": 0.7517, - "step": 41460 - }, - { - "epoch": 0.3666083205148606, - "grad_norm": 6.721607208251953, - "learning_rate": 4.3889861324752325e-05, - "loss": 0.8268, - "step": 41470 - }, - { - "epoch": 0.3666967237751728, - "grad_norm": 7.450668811798096, - "learning_rate": 4.388838793708045e-05, - "loss": 0.9185, - "step": 41480 - }, - { - "epoch": 0.36678512703548505, - "grad_norm": 8.780129432678223, - "learning_rate": 4.388691454940859e-05, - "loss": 0.7457, - "step": 41490 - }, - { - "epoch": 0.36687353029579733, - "grad_norm": 1.755095362663269, - "learning_rate": 4.388544116173671e-05, - "loss": 0.6655, - "step": 41500 - }, - { - "epoch": 0.36696193355610957, - "grad_norm": 1.4471567869186401, - "learning_rate": 4.3883967774064845e-05, - "loss": 0.7387, - "step": 41510 - }, - { - "epoch": 0.3670503368164218, - "grad_norm": 3.916208267211914, - "learning_rate": 4.3882494386392973e-05, - "loss": 0.8674, - "step": 41520 - }, - { - "epoch": 0.367138740076734, - "grad_norm": 3.033754348754883, - "learning_rate": 4.38810209987211e-05, - "loss": 0.8175, - "step": 41530 - }, - { - "epoch": 0.36722714333704626, - "grad_norm": 2.02097487449646, - "learning_rate": 4.387954761104923e-05, - "loss": 0.8189, - "step": 41540 - }, - { - "epoch": 0.3673155465973585, - "grad_norm": 5.735281467437744, - "learning_rate": 4.387807422337736e-05, - "loss": 0.6936, - "step": 41550 - }, - { - "epoch": 0.3674039498576708, - "grad_norm": 4.488494396209717, - "learning_rate": 4.387660083570549e-05, - "loss": 0.7135, - "step": 41560 - }, - { - "epoch": 0.367492353117983, - "grad_norm": 1.9440511465072632, - "learning_rate": 4.387512744803362e-05, - "loss": 0.6788, - "step": 41570 - }, - { - "epoch": 0.36758075637829524, - "grad_norm": 2.082098960876465, - "learning_rate": 4.3873654060361744e-05, - "loss": 0.7994, - "step": 41580 - }, - { - "epoch": 0.36766915963860747, - "grad_norm": 5.62264347076416, - "learning_rate": 4.387218067268988e-05, - "loss": 0.6692, - "step": 41590 - }, - { - "epoch": 0.3677575628989197, - "grad_norm": 2.6430423259735107, - "learning_rate": 4.387070728501801e-05, - "loss": 0.8584, - "step": 41600 - }, - { - "epoch": 0.36784596615923193, - "grad_norm": 13.46376895904541, - "learning_rate": 4.3869233897346135e-05, - "loss": 0.7638, - "step": 41610 - }, - { - "epoch": 0.3679343694195442, - "grad_norm": 5.512721061706543, - "learning_rate": 4.3867760509674264e-05, - "loss": 0.6937, - "step": 41620 - }, - { - "epoch": 0.36802277267985645, - "grad_norm": 2.6101555824279785, - "learning_rate": 4.38662871220024e-05, - "loss": 0.7419, - "step": 41630 - }, - { - "epoch": 0.3681111759401687, - "grad_norm": 9.809114456176758, - "learning_rate": 4.386481373433052e-05, - "loss": 0.7475, - "step": 41640 - }, - { - "epoch": 0.3681995792004809, - "grad_norm": 2.038137435913086, - "learning_rate": 4.3863340346658656e-05, - "loss": 0.7122, - "step": 41650 - }, - { - "epoch": 0.36828798246079314, - "grad_norm": 17.743118286132812, - "learning_rate": 4.386186695898678e-05, - "loss": 0.7984, - "step": 41660 - }, - { - "epoch": 0.36837638572110537, - "grad_norm": 11.9349365234375, - "learning_rate": 4.386039357131491e-05, - "loss": 0.8233, - "step": 41670 - }, - { - "epoch": 0.36846478898141766, - "grad_norm": 3.9197232723236084, - "learning_rate": 4.385892018364304e-05, - "loss": 0.6308, - "step": 41680 - }, - { - "epoch": 0.3685531922417299, - "grad_norm": 8.652718544006348, - "learning_rate": 4.385744679597117e-05, - "loss": 0.7744, - "step": 41690 - }, - { - "epoch": 0.3686415955020421, - "grad_norm": 2.7132115364074707, - "learning_rate": 4.38559734082993e-05, - "loss": 0.8058, - "step": 41700 - }, - { - "epoch": 0.36872999876235435, - "grad_norm": 16.54650115966797, - "learning_rate": 4.385450002062743e-05, - "loss": 0.7016, - "step": 41710 - }, - { - "epoch": 0.3688184020226666, - "grad_norm": 10.114154815673828, - "learning_rate": 4.3853026632955554e-05, - "loss": 0.6669, - "step": 41720 - }, - { - "epoch": 0.3689068052829788, - "grad_norm": 4.78609037399292, - "learning_rate": 4.385155324528369e-05, - "loss": 0.7174, - "step": 41730 - }, - { - "epoch": 0.3689952085432911, - "grad_norm": 5.867161273956299, - "learning_rate": 4.385007985761182e-05, - "loss": 0.7428, - "step": 41740 - }, - { - "epoch": 0.36908361180360333, - "grad_norm": 3.208101511001587, - "learning_rate": 4.3848606469939946e-05, - "loss": 0.7772, - "step": 41750 - }, - { - "epoch": 0.36917201506391556, - "grad_norm": 1.9551610946655273, - "learning_rate": 4.3847133082268074e-05, - "loss": 0.7183, - "step": 41760 - }, - { - "epoch": 0.3692604183242278, - "grad_norm": 1.4658784866333008, - "learning_rate": 4.38456596945962e-05, - "loss": 0.7232, - "step": 41770 - }, - { - "epoch": 0.36934882158454, - "grad_norm": 3.3785741329193115, - "learning_rate": 4.384418630692433e-05, - "loss": 0.7762, - "step": 41780 - }, - { - "epoch": 0.36943722484485225, - "grad_norm": 4.0606465339660645, - "learning_rate": 4.3842712919252466e-05, - "loss": 0.6346, - "step": 41790 - }, - { - "epoch": 0.36952562810516454, - "grad_norm": 10.871868133544922, - "learning_rate": 4.3841239531580594e-05, - "loss": 0.6976, - "step": 41800 - }, - { - "epoch": 0.36961403136547677, - "grad_norm": 7.123082160949707, - "learning_rate": 4.383976614390872e-05, - "loss": 0.7326, - "step": 41810 - }, - { - "epoch": 0.369702434625789, - "grad_norm": 7.928825378417969, - "learning_rate": 4.383829275623685e-05, - "loss": 0.6402, - "step": 41820 - }, - { - "epoch": 0.36979083788610123, - "grad_norm": 1.1369304656982422, - "learning_rate": 4.383681936856498e-05, - "loss": 0.5528, - "step": 41830 - }, - { - "epoch": 0.36987924114641346, - "grad_norm": 1.3517223596572876, - "learning_rate": 4.383534598089311e-05, - "loss": 0.7826, - "step": 41840 - }, - { - "epoch": 0.3699676444067257, - "grad_norm": 18.013916015625, - "learning_rate": 4.383387259322124e-05, - "loss": 0.6546, - "step": 41850 - }, - { - "epoch": 0.370056047667038, - "grad_norm": 2.3677890300750732, - "learning_rate": 4.383239920554937e-05, - "loss": 0.6548, - "step": 41860 - }, - { - "epoch": 0.3701444509273502, - "grad_norm": 5.211153507232666, - "learning_rate": 4.38309258178775e-05, - "loss": 0.8121, - "step": 41870 - }, - { - "epoch": 0.37023285418766244, - "grad_norm": 2.7310221195220947, - "learning_rate": 4.382945243020563e-05, - "loss": 0.6616, - "step": 41880 - }, - { - "epoch": 0.37032125744797467, - "grad_norm": 4.248507976531982, - "learning_rate": 4.3827979042533756e-05, - "loss": 0.742, - "step": 41890 - }, - { - "epoch": 0.3704096607082869, - "grad_norm": 3.743333578109741, - "learning_rate": 4.3826505654861885e-05, - "loss": 0.7965, - "step": 41900 - }, - { - "epoch": 0.3704980639685992, - "grad_norm": 6.355827808380127, - "learning_rate": 4.382503226719001e-05, - "loss": 0.7357, - "step": 41910 - }, - { - "epoch": 0.3705864672289114, - "grad_norm": 3.2447757720947266, - "learning_rate": 4.382355887951815e-05, - "loss": 0.6423, - "step": 41920 - }, - { - "epoch": 0.37067487048922365, - "grad_norm": 6.498961925506592, - "learning_rate": 4.382208549184628e-05, - "loss": 0.6671, - "step": 41930 - }, - { - "epoch": 0.3707632737495359, - "grad_norm": 3.632401466369629, - "learning_rate": 4.3820612104174405e-05, - "loss": 0.7282, - "step": 41940 - }, - { - "epoch": 0.3708516770098481, - "grad_norm": 4.138214111328125, - "learning_rate": 4.381913871650253e-05, - "loss": 0.7385, - "step": 41950 - }, - { - "epoch": 0.37094008027016034, - "grad_norm": 10.882044792175293, - "learning_rate": 4.381766532883066e-05, - "loss": 0.7503, - "step": 41960 - }, - { - "epoch": 0.37102848353047263, - "grad_norm": 2.232042074203491, - "learning_rate": 4.381619194115879e-05, - "loss": 0.5926, - "step": 41970 - }, - { - "epoch": 0.37111688679078486, - "grad_norm": 9.09072494506836, - "learning_rate": 4.3814718553486925e-05, - "loss": 0.7787, - "step": 41980 - }, - { - "epoch": 0.3712052900510971, - "grad_norm": 2.8556175231933594, - "learning_rate": 4.3813245165815054e-05, - "loss": 0.8473, - "step": 41990 - }, - { - "epoch": 0.3712936933114093, - "grad_norm": 4.36794900894165, - "learning_rate": 4.381177177814318e-05, - "loss": 0.7505, - "step": 42000 - }, - { - "epoch": 0.37138209657172155, - "grad_norm": 3.274106740951538, - "learning_rate": 4.381029839047131e-05, - "loss": 0.6209, - "step": 42010 - }, - { - "epoch": 0.3714704998320338, - "grad_norm": 7.524702072143555, - "learning_rate": 4.380882500279944e-05, - "loss": 0.8172, - "step": 42020 - }, - { - "epoch": 0.37155890309234607, - "grad_norm": 3.5206210613250732, - "learning_rate": 4.380735161512757e-05, - "loss": 0.6482, - "step": 42030 - }, - { - "epoch": 0.3716473063526583, - "grad_norm": 1.3513615131378174, - "learning_rate": 4.38058782274557e-05, - "loss": 0.8467, - "step": 42040 - }, - { - "epoch": 0.37173570961297053, - "grad_norm": 2.57810640335083, - "learning_rate": 4.3804404839783824e-05, - "loss": 0.7536, - "step": 42050 - }, - { - "epoch": 0.37182411287328276, - "grad_norm": 10.607290267944336, - "learning_rate": 4.380293145211196e-05, - "loss": 0.775, - "step": 42060 - }, - { - "epoch": 0.371912516133595, - "grad_norm": 8.072117805480957, - "learning_rate": 4.380145806444009e-05, - "loss": 0.7045, - "step": 42070 - }, - { - "epoch": 0.3720009193939072, - "grad_norm": 4.225197792053223, - "learning_rate": 4.3799984676768216e-05, - "loss": 0.6823, - "step": 42080 - }, - { - "epoch": 0.3720893226542195, - "grad_norm": 5.244751930236816, - "learning_rate": 4.3798511289096344e-05, - "loss": 0.7275, - "step": 42090 - }, - { - "epoch": 0.37217772591453174, - "grad_norm": 1.7465792894363403, - "learning_rate": 4.379703790142448e-05, - "loss": 0.5684, - "step": 42100 - }, - { - "epoch": 0.372266129174844, - "grad_norm": 6.863415241241455, - "learning_rate": 4.37955645137526e-05, - "loss": 0.6789, - "step": 42110 - }, - { - "epoch": 0.3723545324351562, - "grad_norm": 1.4748578071594238, - "learning_rate": 4.3794091126080736e-05, - "loss": 0.8776, - "step": 42120 - }, - { - "epoch": 0.37244293569546844, - "grad_norm": 8.206558227539062, - "learning_rate": 4.379261773840886e-05, - "loss": 0.6086, - "step": 42130 - }, - { - "epoch": 0.37253133895578067, - "grad_norm": 4.347940921783447, - "learning_rate": 4.379114435073699e-05, - "loss": 0.8139, - "step": 42140 - }, - { - "epoch": 0.37261974221609295, - "grad_norm": 2.5444750785827637, - "learning_rate": 4.378967096306512e-05, - "loss": 0.6933, - "step": 42150 - }, - { - "epoch": 0.3727081454764052, - "grad_norm": 3.0841615200042725, - "learning_rate": 4.378819757539325e-05, - "loss": 0.6849, - "step": 42160 - }, - { - "epoch": 0.3727965487367174, - "grad_norm": 11.621058464050293, - "learning_rate": 4.378672418772138e-05, - "loss": 0.7994, - "step": 42170 - }, - { - "epoch": 0.37288495199702965, - "grad_norm": 7.347725868225098, - "learning_rate": 4.378525080004951e-05, - "loss": 0.6785, - "step": 42180 - }, - { - "epoch": 0.3729733552573419, - "grad_norm": 4.26618766784668, - "learning_rate": 4.3783777412377634e-05, - "loss": 0.7254, - "step": 42190 - }, - { - "epoch": 0.3730617585176541, - "grad_norm": 5.139187812805176, - "learning_rate": 4.378230402470577e-05, - "loss": 0.6932, - "step": 42200 - }, - { - "epoch": 0.3731501617779664, - "grad_norm": 2.561830759048462, - "learning_rate": 4.37808306370339e-05, - "loss": 0.7827, - "step": 42210 - }, - { - "epoch": 0.3732385650382786, - "grad_norm": 8.85420036315918, - "learning_rate": 4.3779357249362026e-05, - "loss": 0.7687, - "step": 42220 - }, - { - "epoch": 0.37332696829859086, - "grad_norm": 3.529747486114502, - "learning_rate": 4.3777883861690154e-05, - "loss": 0.6549, - "step": 42230 - }, - { - "epoch": 0.3734153715589031, - "grad_norm": 3.643594741821289, - "learning_rate": 4.377641047401828e-05, - "loss": 0.8189, - "step": 42240 - }, - { - "epoch": 0.3735037748192153, - "grad_norm": 8.124227523803711, - "learning_rate": 4.377493708634641e-05, - "loss": 0.7284, - "step": 42250 - }, - { - "epoch": 0.37359217807952755, - "grad_norm": 6.099574565887451, - "learning_rate": 4.3773463698674546e-05, - "loss": 0.8366, - "step": 42260 - }, - { - "epoch": 0.37368058133983983, - "grad_norm": 1.4467730522155762, - "learning_rate": 4.377199031100267e-05, - "loss": 0.6122, - "step": 42270 - }, - { - "epoch": 0.37376898460015207, - "grad_norm": 3.4916584491729736, - "learning_rate": 4.37705169233308e-05, - "loss": 0.6087, - "step": 42280 - }, - { - "epoch": 0.3738573878604643, - "grad_norm": 8.590494155883789, - "learning_rate": 4.376904353565893e-05, - "loss": 0.7862, - "step": 42290 - }, - { - "epoch": 0.3739457911207765, - "grad_norm": 1.360739827156067, - "learning_rate": 4.376757014798706e-05, - "loss": 0.6658, - "step": 42300 - }, - { - "epoch": 0.37403419438108876, - "grad_norm": 8.499696731567383, - "learning_rate": 4.376609676031519e-05, - "loss": 0.6978, - "step": 42310 - }, - { - "epoch": 0.374122597641401, - "grad_norm": 4.420283794403076, - "learning_rate": 4.376462337264332e-05, - "loss": 0.7048, - "step": 42320 - }, - { - "epoch": 0.3742110009017133, - "grad_norm": 7.824456214904785, - "learning_rate": 4.3763149984971445e-05, - "loss": 0.6504, - "step": 42330 - }, - { - "epoch": 0.3742994041620255, - "grad_norm": 8.62056827545166, - "learning_rate": 4.376167659729958e-05, - "loss": 0.9202, - "step": 42340 - }, - { - "epoch": 0.37438780742233774, - "grad_norm": 1.988431453704834, - "learning_rate": 4.376020320962771e-05, - "loss": 0.6392, - "step": 42350 - }, - { - "epoch": 0.37447621068264997, - "grad_norm": 2.344813346862793, - "learning_rate": 4.3758729821955837e-05, - "loss": 0.6795, - "step": 42360 - }, - { - "epoch": 0.3745646139429622, - "grad_norm": 3.136532783508301, - "learning_rate": 4.3757256434283965e-05, - "loss": 0.796, - "step": 42370 - }, - { - "epoch": 0.37465301720327443, - "grad_norm": 2.549125909805298, - "learning_rate": 4.375578304661209e-05, - "loss": 0.6904, - "step": 42380 - }, - { - "epoch": 0.3747414204635867, - "grad_norm": 6.721037864685059, - "learning_rate": 4.375430965894022e-05, - "loss": 0.6246, - "step": 42390 - }, - { - "epoch": 0.37482982372389895, - "grad_norm": 13.33173942565918, - "learning_rate": 4.375283627126836e-05, - "loss": 0.6006, - "step": 42400 - }, - { - "epoch": 0.3749182269842112, - "grad_norm": 2.099210739135742, - "learning_rate": 4.375136288359648e-05, - "loss": 0.7107, - "step": 42410 - }, - { - "epoch": 0.3750066302445234, - "grad_norm": 7.2005743980407715, - "learning_rate": 4.3749889495924613e-05, - "loss": 0.8144, - "step": 42420 - }, - { - "epoch": 0.37509503350483564, - "grad_norm": 1.8392319679260254, - "learning_rate": 4.374841610825274e-05, - "loss": 0.7128, - "step": 42430 - }, - { - "epoch": 0.3751834367651479, - "grad_norm": 2.1908111572265625, - "learning_rate": 4.374694272058087e-05, - "loss": 0.7567, - "step": 42440 - }, - { - "epoch": 0.37527184002546016, - "grad_norm": 1.7751134634017944, - "learning_rate": 4.3745469332909e-05, - "loss": 0.5544, - "step": 42450 - }, - { - "epoch": 0.3753602432857724, - "grad_norm": 8.485166549682617, - "learning_rate": 4.3743995945237134e-05, - "loss": 0.7592, - "step": 42460 - }, - { - "epoch": 0.3754486465460846, - "grad_norm": 6.904178142547607, - "learning_rate": 4.3742522557565255e-05, - "loss": 0.7997, - "step": 42470 - }, - { - "epoch": 0.37553704980639685, - "grad_norm": 8.167841911315918, - "learning_rate": 4.374104916989339e-05, - "loss": 0.8538, - "step": 42480 - }, - { - "epoch": 0.3756254530667091, - "grad_norm": 4.086103439331055, - "learning_rate": 4.373957578222151e-05, - "loss": 0.7311, - "step": 42490 - }, - { - "epoch": 0.37571385632702137, - "grad_norm": 4.62849760055542, - "learning_rate": 4.373810239454965e-05, - "loss": 0.5907, - "step": 42500 - }, - { - "epoch": 0.3758022595873336, - "grad_norm": 2.8434784412384033, - "learning_rate": 4.3736629006877775e-05, - "loss": 0.6638, - "step": 42510 - }, - { - "epoch": 0.37589066284764583, - "grad_norm": 1.8806405067443848, - "learning_rate": 4.3735155619205904e-05, - "loss": 0.772, - "step": 42520 - }, - { - "epoch": 0.37597906610795806, - "grad_norm": 4.3698506355285645, - "learning_rate": 4.373368223153403e-05, - "loss": 0.8148, - "step": 42530 - }, - { - "epoch": 0.3760674693682703, - "grad_norm": 7.582542896270752, - "learning_rate": 4.373220884386217e-05, - "loss": 0.7863, - "step": 42540 - }, - { - "epoch": 0.3761558726285825, - "grad_norm": 1.7512024641036987, - "learning_rate": 4.373073545619029e-05, - "loss": 0.7152, - "step": 42550 - }, - { - "epoch": 0.3762442758888948, - "grad_norm": 4.376026153564453, - "learning_rate": 4.3729262068518424e-05, - "loss": 0.7138, - "step": 42560 - }, - { - "epoch": 0.37633267914920704, - "grad_norm": 3.759315252304077, - "learning_rate": 4.372778868084655e-05, - "loss": 0.7308, - "step": 42570 - }, - { - "epoch": 0.37642108240951927, - "grad_norm": 6.476296424865723, - "learning_rate": 4.372631529317468e-05, - "loss": 0.624, - "step": 42580 - }, - { - "epoch": 0.3765094856698315, - "grad_norm": 6.2172532081604, - "learning_rate": 4.372484190550281e-05, - "loss": 0.7558, - "step": 42590 - }, - { - "epoch": 0.37659788893014373, - "grad_norm": 4.6294074058532715, - "learning_rate": 4.372336851783094e-05, - "loss": 0.8166, - "step": 42600 - }, - { - "epoch": 0.37668629219045596, - "grad_norm": 5.101017951965332, - "learning_rate": 4.3721895130159066e-05, - "loss": 0.6992, - "step": 42610 - }, - { - "epoch": 0.37677469545076825, - "grad_norm": 3.605889320373535, - "learning_rate": 4.37204217424872e-05, - "loss": 0.7652, - "step": 42620 - }, - { - "epoch": 0.3768630987110805, - "grad_norm": 17.48322868347168, - "learning_rate": 4.371894835481532e-05, - "loss": 0.7787, - "step": 42630 - }, - { - "epoch": 0.3769515019713927, - "grad_norm": 2.6086907386779785, - "learning_rate": 4.371747496714346e-05, - "loss": 0.7104, - "step": 42640 - }, - { - "epoch": 0.37703990523170494, - "grad_norm": 5.691768169403076, - "learning_rate": 4.3716001579471586e-05, - "loss": 0.8355, - "step": 42650 - }, - { - "epoch": 0.3771283084920172, - "grad_norm": 3.5036377906799316, - "learning_rate": 4.3714528191799714e-05, - "loss": 0.7362, - "step": 42660 - }, - { - "epoch": 0.3772167117523294, - "grad_norm": 7.328939914703369, - "learning_rate": 4.371305480412784e-05, - "loss": 0.7096, - "step": 42670 - }, - { - "epoch": 0.3773051150126417, - "grad_norm": 1.5260515213012695, - "learning_rate": 4.371158141645598e-05, - "loss": 0.6667, - "step": 42680 - }, - { - "epoch": 0.3773935182729539, - "grad_norm": 9.549139976501465, - "learning_rate": 4.37101080287841e-05, - "loss": 0.7302, - "step": 42690 - }, - { - "epoch": 0.37748192153326615, - "grad_norm": 6.921221733093262, - "learning_rate": 4.3708634641112234e-05, - "loss": 0.6683, - "step": 42700 - }, - { - "epoch": 0.3775703247935784, - "grad_norm": 1.72034752368927, - "learning_rate": 4.370716125344036e-05, - "loss": 0.6454, - "step": 42710 - }, - { - "epoch": 0.3776587280538906, - "grad_norm": 2.3333041667938232, - "learning_rate": 4.370568786576849e-05, - "loss": 0.627, - "step": 42720 - }, - { - "epoch": 0.37774713131420284, - "grad_norm": 2.242698907852173, - "learning_rate": 4.370421447809662e-05, - "loss": 0.811, - "step": 42730 - }, - { - "epoch": 0.37783553457451513, - "grad_norm": 2.7384488582611084, - "learning_rate": 4.370274109042475e-05, - "loss": 0.787, - "step": 42740 - }, - { - "epoch": 0.37792393783482736, - "grad_norm": 2.396437644958496, - "learning_rate": 4.3701267702752876e-05, - "loss": 0.7265, - "step": 42750 - }, - { - "epoch": 0.3780123410951396, - "grad_norm": 2.599555730819702, - "learning_rate": 4.369979431508101e-05, - "loss": 0.6787, - "step": 42760 - }, - { - "epoch": 0.3781007443554518, - "grad_norm": 9.896321296691895, - "learning_rate": 4.369832092740914e-05, - "loss": 0.74, - "step": 42770 - }, - { - "epoch": 0.37818914761576405, - "grad_norm": 4.279694080352783, - "learning_rate": 4.369684753973727e-05, - "loss": 0.896, - "step": 42780 - }, - { - "epoch": 0.3782775508760763, - "grad_norm": 4.9580078125, - "learning_rate": 4.3695374152065396e-05, - "loss": 0.7434, - "step": 42790 - }, - { - "epoch": 0.37836595413638857, - "grad_norm": 5.293226718902588, - "learning_rate": 4.3693900764393525e-05, - "loss": 0.6834, - "step": 42800 - }, - { - "epoch": 0.3784543573967008, - "grad_norm": 1.704933762550354, - "learning_rate": 4.369242737672165e-05, - "loss": 0.7799, - "step": 42810 - }, - { - "epoch": 0.37854276065701303, - "grad_norm": 10.47739315032959, - "learning_rate": 4.369095398904979e-05, - "loss": 0.6782, - "step": 42820 - }, - { - "epoch": 0.37863116391732526, - "grad_norm": 5.8227996826171875, - "learning_rate": 4.3689480601377917e-05, - "loss": 0.5866, - "step": 42830 - }, - { - "epoch": 0.3787195671776375, - "grad_norm": 3.2358522415161133, - "learning_rate": 4.3688007213706045e-05, - "loss": 0.7726, - "step": 42840 - }, - { - "epoch": 0.3788079704379497, - "grad_norm": 2.994148015975952, - "learning_rate": 4.368653382603417e-05, - "loss": 0.6556, - "step": 42850 - }, - { - "epoch": 0.378896373698262, - "grad_norm": 4.2836785316467285, - "learning_rate": 4.36850604383623e-05, - "loss": 0.7697, - "step": 42860 - }, - { - "epoch": 0.37898477695857424, - "grad_norm": 5.644418239593506, - "learning_rate": 4.368358705069043e-05, - "loss": 0.6793, - "step": 42870 - }, - { - "epoch": 0.3790731802188865, - "grad_norm": 2.221799612045288, - "learning_rate": 4.368211366301856e-05, - "loss": 0.6692, - "step": 42880 - }, - { - "epoch": 0.3791615834791987, - "grad_norm": 14.580803871154785, - "learning_rate": 4.3680640275346693e-05, - "loss": 0.8032, - "step": 42890 - }, - { - "epoch": 0.37924998673951094, - "grad_norm": 5.326828956604004, - "learning_rate": 4.367916688767482e-05, - "loss": 0.7324, - "step": 42900 - }, - { - "epoch": 0.37933838999982317, - "grad_norm": 1.2381949424743652, - "learning_rate": 4.367769350000295e-05, - "loss": 0.616, - "step": 42910 - }, - { - "epoch": 0.37942679326013545, - "grad_norm": 3.5227928161621094, - "learning_rate": 4.367622011233108e-05, - "loss": 0.7771, - "step": 42920 - }, - { - "epoch": 0.3795151965204477, - "grad_norm": 6.06992769241333, - "learning_rate": 4.367474672465921e-05, - "loss": 0.6412, - "step": 42930 - }, - { - "epoch": 0.3796035997807599, - "grad_norm": 2.6467907428741455, - "learning_rate": 4.3673273336987335e-05, - "loss": 0.7874, - "step": 42940 - }, - { - "epoch": 0.37969200304107215, - "grad_norm": 9.846500396728516, - "learning_rate": 4.367179994931547e-05, - "loss": 0.7751, - "step": 42950 - }, - { - "epoch": 0.3797804063013844, - "grad_norm": 5.769461154937744, - "learning_rate": 4.367032656164359e-05, - "loss": 0.8085, - "step": 42960 - }, - { - "epoch": 0.37986880956169666, - "grad_norm": 2.9866957664489746, - "learning_rate": 4.366885317397173e-05, - "loss": 0.7827, - "step": 42970 - }, - { - "epoch": 0.3799572128220089, - "grad_norm": 2.3471596240997314, - "learning_rate": 4.3667379786299855e-05, - "loss": 0.6464, - "step": 42980 - }, - { - "epoch": 0.3800456160823211, - "grad_norm": 3.230699300765991, - "learning_rate": 4.3665906398627984e-05, - "loss": 0.798, - "step": 42990 - }, - { - "epoch": 0.38013401934263336, - "grad_norm": 4.558084011077881, - "learning_rate": 4.366443301095611e-05, - "loss": 0.8389, - "step": 43000 - }, - { - "epoch": 0.3802224226029456, - "grad_norm": 3.927190065383911, - "learning_rate": 4.366295962328425e-05, - "loss": 0.7066, - "step": 43010 - }, - { - "epoch": 0.3803108258632578, - "grad_norm": 6.640660762786865, - "learning_rate": 4.366148623561237e-05, - "loss": 0.8037, - "step": 43020 - }, - { - "epoch": 0.3803992291235701, - "grad_norm": 1.7032018899917603, - "learning_rate": 4.3660012847940504e-05, - "loss": 0.8016, - "step": 43030 - }, - { - "epoch": 0.38048763238388233, - "grad_norm": 3.529633045196533, - "learning_rate": 4.365853946026863e-05, - "loss": 0.8302, - "step": 43040 - }, - { - "epoch": 0.38057603564419457, - "grad_norm": 8.706425666809082, - "learning_rate": 4.365706607259676e-05, - "loss": 0.7552, - "step": 43050 - }, - { - "epoch": 0.3806644389045068, - "grad_norm": 6.448954105377197, - "learning_rate": 4.365559268492489e-05, - "loss": 0.7492, - "step": 43060 - }, - { - "epoch": 0.380752842164819, - "grad_norm": 2.459713935852051, - "learning_rate": 4.365411929725302e-05, - "loss": 0.7489, - "step": 43070 - }, - { - "epoch": 0.38084124542513126, - "grad_norm": 2.4048988819122314, - "learning_rate": 4.3652645909581146e-05, - "loss": 0.7565, - "step": 43080 - }, - { - "epoch": 0.38092964868544354, - "grad_norm": 8.155169486999512, - "learning_rate": 4.365117252190928e-05, - "loss": 0.7606, - "step": 43090 - }, - { - "epoch": 0.3810180519457558, - "grad_norm": 3.0469725131988525, - "learning_rate": 4.36496991342374e-05, - "loss": 0.821, - "step": 43100 - }, - { - "epoch": 0.381106455206068, - "grad_norm": 4.180539608001709, - "learning_rate": 4.364822574656554e-05, - "loss": 0.6277, - "step": 43110 - }, - { - "epoch": 0.38119485846638024, - "grad_norm": 3.0767343044281006, - "learning_rate": 4.3646752358893666e-05, - "loss": 0.8115, - "step": 43120 - }, - { - "epoch": 0.38128326172669247, - "grad_norm": 3.867610216140747, - "learning_rate": 4.3645278971221794e-05, - "loss": 0.5614, - "step": 43130 - }, - { - "epoch": 0.3813716649870047, - "grad_norm": 3.3509323596954346, - "learning_rate": 4.364380558354992e-05, - "loss": 0.7551, - "step": 43140 - }, - { - "epoch": 0.381460068247317, - "grad_norm": 2.767132043838501, - "learning_rate": 4.364233219587806e-05, - "loss": 0.7499, - "step": 43150 - }, - { - "epoch": 0.3815484715076292, - "grad_norm": 4.620316505432129, - "learning_rate": 4.364085880820618e-05, - "loss": 0.6625, - "step": 43160 - }, - { - "epoch": 0.38163687476794145, - "grad_norm": 5.942996501922607, - "learning_rate": 4.3639385420534315e-05, - "loss": 0.6448, - "step": 43170 - }, - { - "epoch": 0.3817252780282537, - "grad_norm": 7.085754871368408, - "learning_rate": 4.3637912032862436e-05, - "loss": 0.7338, - "step": 43180 - }, - { - "epoch": 0.3818136812885659, - "grad_norm": 1.9270126819610596, - "learning_rate": 4.363643864519057e-05, - "loss": 0.7296, - "step": 43190 - }, - { - "epoch": 0.38190208454887814, - "grad_norm": 4.797279357910156, - "learning_rate": 4.36349652575187e-05, - "loss": 0.7614, - "step": 43200 - }, - { - "epoch": 0.3819904878091904, - "grad_norm": 4.248345851898193, - "learning_rate": 4.363349186984683e-05, - "loss": 0.7257, - "step": 43210 - }, - { - "epoch": 0.38207889106950266, - "grad_norm": 4.137569427490234, - "learning_rate": 4.3632018482174956e-05, - "loss": 0.6503, - "step": 43220 - }, - { - "epoch": 0.3821672943298149, - "grad_norm": 2.1250267028808594, - "learning_rate": 4.363054509450309e-05, - "loss": 0.7874, - "step": 43230 - }, - { - "epoch": 0.3822556975901271, - "grad_norm": 2.3361072540283203, - "learning_rate": 4.362907170683121e-05, - "loss": 0.7408, - "step": 43240 - }, - { - "epoch": 0.38234410085043935, - "grad_norm": 10.690939903259277, - "learning_rate": 4.362759831915935e-05, - "loss": 0.8218, - "step": 43250 - }, - { - "epoch": 0.3824325041107516, - "grad_norm": 5.73743200302124, - "learning_rate": 4.3626124931487476e-05, - "loss": 0.8849, - "step": 43260 - }, - { - "epoch": 0.38252090737106387, - "grad_norm": 8.772205352783203, - "learning_rate": 4.3624651543815605e-05, - "loss": 0.7937, - "step": 43270 - }, - { - "epoch": 0.3826093106313761, - "grad_norm": 5.444756031036377, - "learning_rate": 4.362317815614373e-05, - "loss": 0.7493, - "step": 43280 - }, - { - "epoch": 0.38269771389168833, - "grad_norm": 2.365267038345337, - "learning_rate": 4.362170476847187e-05, - "loss": 0.8066, - "step": 43290 - }, - { - "epoch": 0.38278611715200056, - "grad_norm": 5.630627632141113, - "learning_rate": 4.362023138079999e-05, - "loss": 0.7531, - "step": 43300 - }, - { - "epoch": 0.3828745204123128, - "grad_norm": 3.3397905826568604, - "learning_rate": 4.3618757993128125e-05, - "loss": 0.7223, - "step": 43310 - }, - { - "epoch": 0.382962923672625, - "grad_norm": 7.111069202423096, - "learning_rate": 4.3617284605456247e-05, - "loss": 0.6533, - "step": 43320 - }, - { - "epoch": 0.3830513269329373, - "grad_norm": 2.439441680908203, - "learning_rate": 4.361581121778438e-05, - "loss": 0.7964, - "step": 43330 - }, - { - "epoch": 0.38313973019324954, - "grad_norm": 5.555276870727539, - "learning_rate": 4.361433783011251e-05, - "loss": 0.6576, - "step": 43340 - }, - { - "epoch": 0.38322813345356177, - "grad_norm": 8.322966575622559, - "learning_rate": 4.361286444244064e-05, - "loss": 0.7967, - "step": 43350 - }, - { - "epoch": 0.383316536713874, - "grad_norm": 15.328668594360352, - "learning_rate": 4.361139105476877e-05, - "loss": 0.6977, - "step": 43360 - }, - { - "epoch": 0.38340493997418623, - "grad_norm": 9.367509841918945, - "learning_rate": 4.36099176670969e-05, - "loss": 0.7085, - "step": 43370 - }, - { - "epoch": 0.38349334323449846, - "grad_norm": 8.669416427612305, - "learning_rate": 4.3608444279425023e-05, - "loss": 0.7451, - "step": 43380 - }, - { - "epoch": 0.38358174649481075, - "grad_norm": 3.248068332672119, - "learning_rate": 4.360697089175316e-05, - "loss": 0.6189, - "step": 43390 - }, - { - "epoch": 0.383670149755123, - "grad_norm": 9.352893829345703, - "learning_rate": 4.360549750408129e-05, - "loss": 0.7816, - "step": 43400 - }, - { - "epoch": 0.3837585530154352, - "grad_norm": 3.005835771560669, - "learning_rate": 4.3604024116409415e-05, - "loss": 0.8267, - "step": 43410 - }, - { - "epoch": 0.38384695627574744, - "grad_norm": 2.432826042175293, - "learning_rate": 4.3602550728737544e-05, - "loss": 0.7032, - "step": 43420 - }, - { - "epoch": 0.3839353595360597, - "grad_norm": 2.6174306869506836, - "learning_rate": 4.360107734106567e-05, - "loss": 0.7607, - "step": 43430 - }, - { - "epoch": 0.3840237627963719, - "grad_norm": 2.715211868286133, - "learning_rate": 4.35996039533938e-05, - "loss": 0.7467, - "step": 43440 - }, - { - "epoch": 0.3841121660566842, - "grad_norm": 10.05677318572998, - "learning_rate": 4.3598130565721936e-05, - "loss": 0.7146, - "step": 43450 - }, - { - "epoch": 0.3842005693169964, - "grad_norm": 2.7749428749084473, - "learning_rate": 4.359665717805006e-05, - "loss": 0.7512, - "step": 43460 - }, - { - "epoch": 0.38428897257730865, - "grad_norm": 4.629761219024658, - "learning_rate": 4.359518379037819e-05, - "loss": 0.7493, - "step": 43470 - }, - { - "epoch": 0.3843773758376209, - "grad_norm": 3.2584049701690674, - "learning_rate": 4.359371040270632e-05, - "loss": 0.6809, - "step": 43480 - }, - { - "epoch": 0.3844657790979331, - "grad_norm": 4.336820125579834, - "learning_rate": 4.359223701503445e-05, - "loss": 0.731, - "step": 43490 - }, - { - "epoch": 0.38455418235824534, - "grad_norm": 5.319371700286865, - "learning_rate": 4.359076362736258e-05, - "loss": 0.675, - "step": 43500 - }, - { - "epoch": 0.38464258561855763, - "grad_norm": 3.8213205337524414, - "learning_rate": 4.358929023969071e-05, - "loss": 0.7017, - "step": 43510 - }, - { - "epoch": 0.38473098887886986, - "grad_norm": 3.5950369834899902, - "learning_rate": 4.3587816852018834e-05, - "loss": 0.7241, - "step": 43520 - }, - { - "epoch": 0.3848193921391821, - "grad_norm": 4.100440502166748, - "learning_rate": 4.358634346434697e-05, - "loss": 0.6119, - "step": 43530 - }, - { - "epoch": 0.3849077953994943, - "grad_norm": 4.950203895568848, - "learning_rate": 4.358487007667509e-05, - "loss": 0.6241, - "step": 43540 - }, - { - "epoch": 0.38499619865980655, - "grad_norm": 2.266775369644165, - "learning_rate": 4.3583396689003226e-05, - "loss": 0.6648, - "step": 43550 - }, - { - "epoch": 0.38508460192011884, - "grad_norm": 3.357187032699585, - "learning_rate": 4.3581923301331354e-05, - "loss": 0.6899, - "step": 43560 - }, - { - "epoch": 0.38517300518043107, - "grad_norm": 3.2378625869750977, - "learning_rate": 4.358044991365948e-05, - "loss": 0.7301, - "step": 43570 - }, - { - "epoch": 0.3852614084407433, - "grad_norm": 3.4043920040130615, - "learning_rate": 4.357897652598761e-05, - "loss": 0.6919, - "step": 43580 - }, - { - "epoch": 0.38534981170105553, - "grad_norm": 9.793386459350586, - "learning_rate": 4.3577503138315746e-05, - "loss": 0.7642, - "step": 43590 - }, - { - "epoch": 0.38543821496136776, - "grad_norm": 4.7564311027526855, - "learning_rate": 4.357602975064387e-05, - "loss": 0.7277, - "step": 43600 - }, - { - "epoch": 0.38552661822168, - "grad_norm": 10.041168212890625, - "learning_rate": 4.3574556362972e-05, - "loss": 0.6197, - "step": 43610 - }, - { - "epoch": 0.3856150214819923, - "grad_norm": 2.1328935623168945, - "learning_rate": 4.357308297530013e-05, - "loss": 0.723, - "step": 43620 - }, - { - "epoch": 0.3857034247423045, - "grad_norm": 4.792178630828857, - "learning_rate": 4.357160958762826e-05, - "loss": 0.7592, - "step": 43630 - }, - { - "epoch": 0.38579182800261674, - "grad_norm": 7.255916595458984, - "learning_rate": 4.357013619995639e-05, - "loss": 0.7721, - "step": 43640 - }, - { - "epoch": 0.385880231262929, - "grad_norm": 2.069598436355591, - "learning_rate": 4.3568662812284516e-05, - "loss": 0.7243, - "step": 43650 - }, - { - "epoch": 0.3859686345232412, - "grad_norm": 4.392913818359375, - "learning_rate": 4.3567189424612645e-05, - "loss": 0.7054, - "step": 43660 - }, - { - "epoch": 0.38605703778355344, - "grad_norm": 14.540793418884277, - "learning_rate": 4.356571603694078e-05, - "loss": 0.6862, - "step": 43670 - }, - { - "epoch": 0.3861454410438657, - "grad_norm": 8.115978240966797, - "learning_rate": 4.356424264926891e-05, - "loss": 0.7095, - "step": 43680 - }, - { - "epoch": 0.38623384430417795, - "grad_norm": 5.372908592224121, - "learning_rate": 4.3562769261597036e-05, - "loss": 0.6752, - "step": 43690 - }, - { - "epoch": 0.3863222475644902, - "grad_norm": 2.51652455329895, - "learning_rate": 4.3561295873925165e-05, - "loss": 0.7102, - "step": 43700 - }, - { - "epoch": 0.3864106508248024, - "grad_norm": 3.7599122524261475, - "learning_rate": 4.355982248625329e-05, - "loss": 0.6836, - "step": 43710 - }, - { - "epoch": 0.38649905408511465, - "grad_norm": 6.177261829376221, - "learning_rate": 4.355834909858142e-05, - "loss": 0.792, - "step": 43720 - }, - { - "epoch": 0.3865874573454269, - "grad_norm": 4.799642086029053, - "learning_rate": 4.3556875710909557e-05, - "loss": 0.6688, - "step": 43730 - }, - { - "epoch": 0.38667586060573916, - "grad_norm": 2.5385894775390625, - "learning_rate": 4.3555402323237685e-05, - "loss": 0.6201, - "step": 43740 - }, - { - "epoch": 0.3867642638660514, - "grad_norm": 1.4727774858474731, - "learning_rate": 4.355392893556581e-05, - "loss": 0.6661, - "step": 43750 - }, - { - "epoch": 0.3868526671263636, - "grad_norm": 4.139338970184326, - "learning_rate": 4.355245554789394e-05, - "loss": 0.6879, - "step": 43760 - }, - { - "epoch": 0.38694107038667586, - "grad_norm": 3.651340961456299, - "learning_rate": 4.355098216022207e-05, - "loss": 0.74, - "step": 43770 - }, - { - "epoch": 0.3870294736469881, - "grad_norm": 4.973133563995361, - "learning_rate": 4.35495087725502e-05, - "loss": 0.6589, - "step": 43780 - }, - { - "epoch": 0.3871178769073003, - "grad_norm": 9.292289733886719, - "learning_rate": 4.354803538487833e-05, - "loss": 0.7245, - "step": 43790 - }, - { - "epoch": 0.3872062801676126, - "grad_norm": 2.549406051635742, - "learning_rate": 4.354656199720646e-05, - "loss": 0.7695, - "step": 43800 - }, - { - "epoch": 0.38729468342792484, - "grad_norm": 7.283100128173828, - "learning_rate": 4.354508860953459e-05, - "loss": 0.6357, - "step": 43810 - }, - { - "epoch": 0.38738308668823707, - "grad_norm": 7.145496845245361, - "learning_rate": 4.354361522186272e-05, - "loss": 0.6583, - "step": 43820 - }, - { - "epoch": 0.3874714899485493, - "grad_norm": 3.2963976860046387, - "learning_rate": 4.354214183419085e-05, - "loss": 0.712, - "step": 43830 - }, - { - "epoch": 0.38755989320886153, - "grad_norm": 9.30205249786377, - "learning_rate": 4.3540668446518975e-05, - "loss": 0.749, - "step": 43840 - }, - { - "epoch": 0.38764829646917376, - "grad_norm": 4.466895580291748, - "learning_rate": 4.3539195058847104e-05, - "loss": 0.7446, - "step": 43850 - }, - { - "epoch": 0.38773669972948605, - "grad_norm": 4.649153232574463, - "learning_rate": 4.353772167117524e-05, - "loss": 0.7192, - "step": 43860 - }, - { - "epoch": 0.3878251029897983, - "grad_norm": 3.718433380126953, - "learning_rate": 4.353624828350337e-05, - "loss": 0.7486, - "step": 43870 - }, - { - "epoch": 0.3879135062501105, - "grad_norm": 3.3186070919036865, - "learning_rate": 4.3534774895831495e-05, - "loss": 0.7655, - "step": 43880 - }, - { - "epoch": 0.38800190951042274, - "grad_norm": 6.101158142089844, - "learning_rate": 4.3533301508159624e-05, - "loss": 0.6282, - "step": 43890 - }, - { - "epoch": 0.38809031277073497, - "grad_norm": 7.869748115539551, - "learning_rate": 4.353182812048775e-05, - "loss": 0.7269, - "step": 43900 - }, - { - "epoch": 0.3881787160310472, - "grad_norm": 4.291072368621826, - "learning_rate": 4.353035473281588e-05, - "loss": 0.7121, - "step": 43910 - }, - { - "epoch": 0.3882671192913595, - "grad_norm": 2.806745767593384, - "learning_rate": 4.3528881345144016e-05, - "loss": 0.7336, - "step": 43920 - }, - { - "epoch": 0.3883555225516717, - "grad_norm": 1.7958487272262573, - "learning_rate": 4.352740795747214e-05, - "loss": 0.7401, - "step": 43930 - }, - { - "epoch": 0.38844392581198395, - "grad_norm": 2.4116592407226562, - "learning_rate": 4.352593456980027e-05, - "loss": 0.8089, - "step": 43940 - }, - { - "epoch": 0.3885323290722962, - "grad_norm": 2.7241897583007812, - "learning_rate": 4.35244611821284e-05, - "loss": 0.6696, - "step": 43950 - }, - { - "epoch": 0.3886207323326084, - "grad_norm": 3.5028350353240967, - "learning_rate": 4.352298779445653e-05, - "loss": 0.6476, - "step": 43960 - }, - { - "epoch": 0.38870913559292064, - "grad_norm": 8.729159355163574, - "learning_rate": 4.352151440678466e-05, - "loss": 0.6532, - "step": 43970 - }, - { - "epoch": 0.3887975388532329, - "grad_norm": 7.070501804351807, - "learning_rate": 4.352004101911279e-05, - "loss": 0.4986, - "step": 43980 - }, - { - "epoch": 0.38888594211354516, - "grad_norm": 3.0011374950408936, - "learning_rate": 4.3518567631440914e-05, - "loss": 0.7366, - "step": 43990 - }, - { - "epoch": 0.3889743453738574, - "grad_norm": 10.321503639221191, - "learning_rate": 4.351709424376905e-05, - "loss": 0.7788, - "step": 44000 - }, - { - "epoch": 0.3890627486341696, - "grad_norm": 5.201939582824707, - "learning_rate": 4.351562085609717e-05, - "loss": 0.757, - "step": 44010 - }, - { - "epoch": 0.38915115189448185, - "grad_norm": 1.737633466720581, - "learning_rate": 4.3514147468425306e-05, - "loss": 0.6608, - "step": 44020 - }, - { - "epoch": 0.3892395551547941, - "grad_norm": 2.7050435543060303, - "learning_rate": 4.3512674080753434e-05, - "loss": 0.8404, - "step": 44030 - }, - { - "epoch": 0.38932795841510637, - "grad_norm": 2.749333381652832, - "learning_rate": 4.351120069308156e-05, - "loss": 0.7292, - "step": 44040 - }, - { - "epoch": 0.3894163616754186, - "grad_norm": 1.5947707891464233, - "learning_rate": 4.350972730540969e-05, - "loss": 0.6128, - "step": 44050 - }, - { - "epoch": 0.38950476493573083, - "grad_norm": 4.752467155456543, - "learning_rate": 4.3508253917737826e-05, - "loss": 0.6352, - "step": 44060 - }, - { - "epoch": 0.38959316819604306, - "grad_norm": 3.5451290607452393, - "learning_rate": 4.350678053006595e-05, - "loss": 0.7254, - "step": 44070 - }, - { - "epoch": 0.3896815714563553, - "grad_norm": 3.0532515048980713, - "learning_rate": 4.350530714239408e-05, - "loss": 0.6065, - "step": 44080 - }, - { - "epoch": 0.3897699747166676, - "grad_norm": 4.595867156982422, - "learning_rate": 4.350383375472221e-05, - "loss": 0.721, - "step": 44090 - }, - { - "epoch": 0.3898583779769798, - "grad_norm": 2.1476407051086426, - "learning_rate": 4.350236036705034e-05, - "loss": 0.789, - "step": 44100 - }, - { - "epoch": 0.38994678123729204, - "grad_norm": 1.92362642288208, - "learning_rate": 4.350088697937847e-05, - "loss": 0.6836, - "step": 44110 - }, - { - "epoch": 0.39003518449760427, - "grad_norm": 1.0036829710006714, - "learning_rate": 4.3499413591706596e-05, - "loss": 0.6622, - "step": 44120 - }, - { - "epoch": 0.3901235877579165, - "grad_norm": 5.729055881500244, - "learning_rate": 4.3497940204034725e-05, - "loss": 0.632, - "step": 44130 - }, - { - "epoch": 0.39021199101822873, - "grad_norm": 2.0600526332855225, - "learning_rate": 4.349646681636286e-05, - "loss": 0.6647, - "step": 44140 - }, - { - "epoch": 0.390300394278541, - "grad_norm": 2.524857759475708, - "learning_rate": 4.349499342869098e-05, - "loss": 0.6827, - "step": 44150 - }, - { - "epoch": 0.39038879753885325, - "grad_norm": 5.2838544845581055, - "learning_rate": 4.3493520041019116e-05, - "loss": 0.7239, - "step": 44160 - }, - { - "epoch": 0.3904772007991655, - "grad_norm": 5.721658706665039, - "learning_rate": 4.3492046653347245e-05, - "loss": 0.6816, - "step": 44170 - }, - { - "epoch": 0.3905656040594777, - "grad_norm": 7.646422386169434, - "learning_rate": 4.349057326567537e-05, - "loss": 0.7988, - "step": 44180 - }, - { - "epoch": 0.39065400731978994, - "grad_norm": 3.5777852535247803, - "learning_rate": 4.34890998780035e-05, - "loss": 0.7122, - "step": 44190 - }, - { - "epoch": 0.3907424105801022, - "grad_norm": 3.9402763843536377, - "learning_rate": 4.3487626490331637e-05, - "loss": 0.7637, - "step": 44200 - }, - { - "epoch": 0.39083081384041446, - "grad_norm": 6.075642108917236, - "learning_rate": 4.348615310265976e-05, - "loss": 0.8593, - "step": 44210 - }, - { - "epoch": 0.3909192171007267, - "grad_norm": 1.5269951820373535, - "learning_rate": 4.348467971498789e-05, - "loss": 0.6379, - "step": 44220 - }, - { - "epoch": 0.3910076203610389, - "grad_norm": 5.038540840148926, - "learning_rate": 4.348320632731602e-05, - "loss": 0.7545, - "step": 44230 - }, - { - "epoch": 0.39109602362135115, - "grad_norm": 4.454661846160889, - "learning_rate": 4.348173293964415e-05, - "loss": 0.6046, - "step": 44240 - }, - { - "epoch": 0.3911844268816634, - "grad_norm": 3.3446669578552246, - "learning_rate": 4.348025955197228e-05, - "loss": 0.7091, - "step": 44250 - }, - { - "epoch": 0.3912728301419756, - "grad_norm": 4.3299031257629395, - "learning_rate": 4.347878616430041e-05, - "loss": 0.7568, - "step": 44260 - }, - { - "epoch": 0.3913612334022879, - "grad_norm": 2.940662145614624, - "learning_rate": 4.3477312776628535e-05, - "loss": 0.8204, - "step": 44270 - }, - { - "epoch": 0.39144963666260013, - "grad_norm": 2.40651535987854, - "learning_rate": 4.347583938895667e-05, - "loss": 0.7732, - "step": 44280 - }, - { - "epoch": 0.39153803992291236, - "grad_norm": 3.4223484992980957, - "learning_rate": 4.347436600128479e-05, - "loss": 0.6626, - "step": 44290 - }, - { - "epoch": 0.3916264431832246, - "grad_norm": 2.267432928085327, - "learning_rate": 4.347289261361293e-05, - "loss": 0.6695, - "step": 44300 - }, - { - "epoch": 0.3917148464435368, - "grad_norm": 5.355408191680908, - "learning_rate": 4.3471419225941055e-05, - "loss": 0.8069, - "step": 44310 - }, - { - "epoch": 0.39180324970384905, - "grad_norm": 3.348864793777466, - "learning_rate": 4.3469945838269184e-05, - "loss": 0.8162, - "step": 44320 - }, - { - "epoch": 0.39189165296416134, - "grad_norm": 2.9551172256469727, - "learning_rate": 4.346847245059731e-05, - "loss": 0.651, - "step": 44330 - }, - { - "epoch": 0.39198005622447357, - "grad_norm": 6.741861343383789, - "learning_rate": 4.346699906292545e-05, - "loss": 0.6796, - "step": 44340 - }, - { - "epoch": 0.3920684594847858, - "grad_norm": 7.677488803863525, - "learning_rate": 4.346552567525357e-05, - "loss": 0.8011, - "step": 44350 - }, - { - "epoch": 0.39215686274509803, - "grad_norm": 2.3081274032592773, - "learning_rate": 4.3464052287581704e-05, - "loss": 0.6788, - "step": 44360 - }, - { - "epoch": 0.39224526600541026, - "grad_norm": 2.5753400325775146, - "learning_rate": 4.3462578899909825e-05, - "loss": 0.8173, - "step": 44370 - }, - { - "epoch": 0.3923336692657225, - "grad_norm": 3.5336172580718994, - "learning_rate": 4.346110551223796e-05, - "loss": 0.6856, - "step": 44380 - }, - { - "epoch": 0.3924220725260348, - "grad_norm": 4.361479759216309, - "learning_rate": 4.345963212456609e-05, - "loss": 0.685, - "step": 44390 - }, - { - "epoch": 0.392510475786347, - "grad_norm": 1.9852592945098877, - "learning_rate": 4.345815873689422e-05, - "loss": 0.6808, - "step": 44400 - }, - { - "epoch": 0.39259887904665924, - "grad_norm": 1.8553427457809448, - "learning_rate": 4.3456685349222346e-05, - "loss": 0.7773, - "step": 44410 - }, - { - "epoch": 0.3926872823069715, - "grad_norm": 1.07711923122406, - "learning_rate": 4.345521196155048e-05, - "loss": 0.617, - "step": 44420 - }, - { - "epoch": 0.3927756855672837, - "grad_norm": 7.83463191986084, - "learning_rate": 4.34537385738786e-05, - "loss": 0.7796, - "step": 44430 - }, - { - "epoch": 0.39286408882759594, - "grad_norm": 2.078061819076538, - "learning_rate": 4.345226518620674e-05, - "loss": 0.6024, - "step": 44440 - }, - { - "epoch": 0.3929524920879082, - "grad_norm": 13.942878723144531, - "learning_rate": 4.3450791798534866e-05, - "loss": 0.7386, - "step": 44450 - }, - { - "epoch": 0.39304089534822045, - "grad_norm": 2.6606757640838623, - "learning_rate": 4.3449318410862994e-05, - "loss": 0.7623, - "step": 44460 - }, - { - "epoch": 0.3931292986085327, - "grad_norm": 5.481786727905273, - "learning_rate": 4.344784502319112e-05, - "loss": 0.6791, - "step": 44470 - }, - { - "epoch": 0.3932177018688449, - "grad_norm": 4.303702354431152, - "learning_rate": 4.344637163551925e-05, - "loss": 0.5947, - "step": 44480 - }, - { - "epoch": 0.39330610512915715, - "grad_norm": 4.576141834259033, - "learning_rate": 4.344489824784738e-05, - "loss": 0.802, - "step": 44490 - }, - { - "epoch": 0.3933945083894694, - "grad_norm": 4.796335220336914, - "learning_rate": 4.3443424860175514e-05, - "loss": 0.7512, - "step": 44500 - }, - { - "epoch": 0.39348291164978166, - "grad_norm": 3.4886820316314697, - "learning_rate": 4.3441951472503636e-05, - "loss": 0.7189, - "step": 44510 - }, - { - "epoch": 0.3935713149100939, - "grad_norm": 3.4879636764526367, - "learning_rate": 4.344047808483177e-05, - "loss": 0.6584, - "step": 44520 - }, - { - "epoch": 0.3936597181704061, - "grad_norm": 7.3791303634643555, - "learning_rate": 4.34390046971599e-05, - "loss": 0.716, - "step": 44530 - }, - { - "epoch": 0.39374812143071836, - "grad_norm": 1.2782517671585083, - "learning_rate": 4.343753130948803e-05, - "loss": 0.6979, - "step": 44540 - }, - { - "epoch": 0.3938365246910306, - "grad_norm": 3.2520925998687744, - "learning_rate": 4.3436057921816156e-05, - "loss": 0.6334, - "step": 44550 - }, - { - "epoch": 0.3939249279513428, - "grad_norm": 3.488539218902588, - "learning_rate": 4.343458453414429e-05, - "loss": 0.7581, - "step": 44560 - }, - { - "epoch": 0.3940133312116551, - "grad_norm": 2.7524282932281494, - "learning_rate": 4.343311114647241e-05, - "loss": 0.7155, - "step": 44570 - }, - { - "epoch": 0.39410173447196734, - "grad_norm": 5.6065144538879395, - "learning_rate": 4.343163775880055e-05, - "loss": 0.8001, - "step": 44580 - }, - { - "epoch": 0.39419013773227957, - "grad_norm": 1.8697189092636108, - "learning_rate": 4.3430164371128676e-05, - "loss": 0.699, - "step": 44590 - }, - { - "epoch": 0.3942785409925918, - "grad_norm": 5.810731887817383, - "learning_rate": 4.3428690983456805e-05, - "loss": 0.73, - "step": 44600 - }, - { - "epoch": 0.39436694425290403, - "grad_norm": 9.312521934509277, - "learning_rate": 4.342721759578493e-05, - "loss": 0.688, - "step": 44610 - }, - { - "epoch": 0.3944553475132163, - "grad_norm": 2.037626266479492, - "learning_rate": 4.342574420811306e-05, - "loss": 0.823, - "step": 44620 - }, - { - "epoch": 0.39454375077352855, - "grad_norm": 1.7342125177383423, - "learning_rate": 4.342427082044119e-05, - "loss": 0.8827, - "step": 44630 - }, - { - "epoch": 0.3946321540338408, - "grad_norm": 1.786419153213501, - "learning_rate": 4.3422797432769325e-05, - "loss": 0.6922, - "step": 44640 - }, - { - "epoch": 0.394720557294153, - "grad_norm": 2.1118435859680176, - "learning_rate": 4.342132404509745e-05, - "loss": 0.6553, - "step": 44650 - }, - { - "epoch": 0.39480896055446524, - "grad_norm": 3.9189305305480957, - "learning_rate": 4.341985065742558e-05, - "loss": 0.7709, - "step": 44660 - }, - { - "epoch": 0.39489736381477747, - "grad_norm": 5.795950412750244, - "learning_rate": 4.341837726975371e-05, - "loss": 0.7495, - "step": 44670 - }, - { - "epoch": 0.39498576707508976, - "grad_norm": 4.017739295959473, - "learning_rate": 4.341690388208184e-05, - "loss": 0.8069, - "step": 44680 - }, - { - "epoch": 0.395074170335402, - "grad_norm": 4.883296012878418, - "learning_rate": 4.3415430494409967e-05, - "loss": 0.7928, - "step": 44690 - }, - { - "epoch": 0.3951625735957142, - "grad_norm": 6.279229640960693, - "learning_rate": 4.34139571067381e-05, - "loss": 0.7267, - "step": 44700 - }, - { - "epoch": 0.39525097685602645, - "grad_norm": 5.529674530029297, - "learning_rate": 4.341248371906623e-05, - "loss": 0.7277, - "step": 44710 - }, - { - "epoch": 0.3953393801163387, - "grad_norm": 8.532251358032227, - "learning_rate": 4.341101033139436e-05, - "loss": 0.8273, - "step": 44720 - }, - { - "epoch": 0.3954277833766509, - "grad_norm": 2.29500150680542, - "learning_rate": 4.340953694372249e-05, - "loss": 0.7567, - "step": 44730 - }, - { - "epoch": 0.3955161866369632, - "grad_norm": 3.6768202781677246, - "learning_rate": 4.3408063556050615e-05, - "loss": 0.8448, - "step": 44740 - }, - { - "epoch": 0.3956045898972754, - "grad_norm": 3.7816247940063477, - "learning_rate": 4.3406590168378744e-05, - "loss": 0.7814, - "step": 44750 - }, - { - "epoch": 0.39569299315758766, - "grad_norm": 4.607700824737549, - "learning_rate": 4.340511678070687e-05, - "loss": 0.8145, - "step": 44760 - }, - { - "epoch": 0.3957813964178999, - "grad_norm": 3.395230770111084, - "learning_rate": 4.340364339303501e-05, - "loss": 0.8027, - "step": 44770 - }, - { - "epoch": 0.3958697996782121, - "grad_norm": 4.820852756500244, - "learning_rate": 4.3402170005363135e-05, - "loss": 0.7184, - "step": 44780 - }, - { - "epoch": 0.39595820293852435, - "grad_norm": 8.004124641418457, - "learning_rate": 4.3400696617691264e-05, - "loss": 0.7167, - "step": 44790 - }, - { - "epoch": 0.39604660619883664, - "grad_norm": 3.0855867862701416, - "learning_rate": 4.339922323001939e-05, - "loss": 0.7763, - "step": 44800 - }, - { - "epoch": 0.39613500945914887, - "grad_norm": 3.385728120803833, - "learning_rate": 4.339774984234752e-05, - "loss": 0.8062, - "step": 44810 - }, - { - "epoch": 0.3962234127194611, - "grad_norm": 6.4531426429748535, - "learning_rate": 4.339627645467565e-05, - "loss": 0.8392, - "step": 44820 - }, - { - "epoch": 0.39631181597977333, - "grad_norm": 2.572552442550659, - "learning_rate": 4.3394803067003784e-05, - "loss": 0.7051, - "step": 44830 - }, - { - "epoch": 0.39640021924008556, - "grad_norm": 3.2080845832824707, - "learning_rate": 4.3393329679331905e-05, - "loss": 0.7502, - "step": 44840 - }, - { - "epoch": 0.3964886225003978, - "grad_norm": 1.4954874515533447, - "learning_rate": 4.339185629166004e-05, - "loss": 0.7671, - "step": 44850 - }, - { - "epoch": 0.3965770257607101, - "grad_norm": 2.5237584114074707, - "learning_rate": 4.339038290398817e-05, - "loss": 0.6499, - "step": 44860 - }, - { - "epoch": 0.3966654290210223, - "grad_norm": 3.3950321674346924, - "learning_rate": 4.33889095163163e-05, - "loss": 0.7518, - "step": 44870 - }, - { - "epoch": 0.39675383228133454, - "grad_norm": 6.780747413635254, - "learning_rate": 4.3387436128644426e-05, - "loss": 0.767, - "step": 44880 - }, - { - "epoch": 0.39684223554164677, - "grad_norm": 4.4082722663879395, - "learning_rate": 4.338596274097256e-05, - "loss": 0.7036, - "step": 44890 - }, - { - "epoch": 0.396930638801959, - "grad_norm": 1.5104519128799438, - "learning_rate": 4.338448935330068e-05, - "loss": 0.7855, - "step": 44900 - }, - { - "epoch": 0.39701904206227123, - "grad_norm": 3.7163853645324707, - "learning_rate": 4.338301596562882e-05, - "loss": 0.6486, - "step": 44910 - }, - { - "epoch": 0.3971074453225835, - "grad_norm": 6.42086124420166, - "learning_rate": 4.3381542577956946e-05, - "loss": 0.7757, - "step": 44920 - }, - { - "epoch": 0.39719584858289575, - "grad_norm": 3.1415579319000244, - "learning_rate": 4.3380069190285074e-05, - "loss": 0.7589, - "step": 44930 - }, - { - "epoch": 0.397284251843208, - "grad_norm": 7.206704139709473, - "learning_rate": 4.33785958026132e-05, - "loss": 0.5722, - "step": 44940 - }, - { - "epoch": 0.3973726551035202, - "grad_norm": 9.921045303344727, - "learning_rate": 4.337712241494133e-05, - "loss": 0.6701, - "step": 44950 - }, - { - "epoch": 0.39746105836383244, - "grad_norm": 3.760974168777466, - "learning_rate": 4.337564902726946e-05, - "loss": 0.6208, - "step": 44960 - }, - { - "epoch": 0.3975494616241447, - "grad_norm": 2.382204294204712, - "learning_rate": 4.3374175639597594e-05, - "loss": 0.7419, - "step": 44970 - }, - { - "epoch": 0.39763786488445696, - "grad_norm": 1.4210044145584106, - "learning_rate": 4.3372702251925716e-05, - "loss": 0.7163, - "step": 44980 - }, - { - "epoch": 0.3977262681447692, - "grad_norm": 4.760910511016846, - "learning_rate": 4.337122886425385e-05, - "loss": 0.7925, - "step": 44990 - }, - { - "epoch": 0.3978146714050814, - "grad_norm": 2.803283214569092, - "learning_rate": 4.336975547658198e-05, - "loss": 0.7196, - "step": 45000 - }, - { - "epoch": 0.39790307466539365, - "grad_norm": 2.0723612308502197, - "learning_rate": 4.336828208891011e-05, - "loss": 0.6277, - "step": 45010 - }, - { - "epoch": 0.3979914779257059, - "grad_norm": 4.410577297210693, - "learning_rate": 4.3366808701238236e-05, - "loss": 0.7849, - "step": 45020 - }, - { - "epoch": 0.3980798811860181, - "grad_norm": 3.897620916366577, - "learning_rate": 4.336533531356637e-05, - "loss": 0.7683, - "step": 45030 - }, - { - "epoch": 0.3981682844463304, - "grad_norm": 1.4067083597183228, - "learning_rate": 4.336386192589449e-05, - "loss": 0.8945, - "step": 45040 - }, - { - "epoch": 0.39825668770664263, - "grad_norm": 2.771552801132202, - "learning_rate": 4.336238853822263e-05, - "loss": 0.6967, - "step": 45050 - }, - { - "epoch": 0.39834509096695486, - "grad_norm": 5.584054470062256, - "learning_rate": 4.3360915150550756e-05, - "loss": 0.628, - "step": 45060 - }, - { - "epoch": 0.3984334942272671, - "grad_norm": 3.6713294982910156, - "learning_rate": 4.3359441762878885e-05, - "loss": 0.6827, - "step": 45070 - }, - { - "epoch": 0.3985218974875793, - "grad_norm": 2.9452860355377197, - "learning_rate": 4.335796837520701e-05, - "loss": 0.6416, - "step": 45080 - }, - { - "epoch": 0.39861030074789155, - "grad_norm": 1.9640501737594604, - "learning_rate": 4.335649498753514e-05, - "loss": 0.7386, - "step": 45090 - }, - { - "epoch": 0.39869870400820384, - "grad_norm": 3.809224843978882, - "learning_rate": 4.335502159986327e-05, - "loss": 0.7708, - "step": 45100 - }, - { - "epoch": 0.3987871072685161, - "grad_norm": 2.6763768196105957, - "learning_rate": 4.3353548212191405e-05, - "loss": 0.7598, - "step": 45110 - }, - { - "epoch": 0.3988755105288283, - "grad_norm": 2.7607669830322266, - "learning_rate": 4.3352074824519526e-05, - "loss": 0.7248, - "step": 45120 - }, - { - "epoch": 0.39896391378914053, - "grad_norm": 1.9777405261993408, - "learning_rate": 4.335060143684766e-05, - "loss": 0.7871, - "step": 45130 - }, - { - "epoch": 0.39905231704945276, - "grad_norm": 5.079465389251709, - "learning_rate": 4.334912804917579e-05, - "loss": 0.7279, - "step": 45140 - }, - { - "epoch": 0.39914072030976505, - "grad_norm": 7.746766567230225, - "learning_rate": 4.334765466150392e-05, - "loss": 0.7292, - "step": 45150 - }, - { - "epoch": 0.3992291235700773, - "grad_norm": 5.309417724609375, - "learning_rate": 4.334618127383205e-05, - "loss": 0.9228, - "step": 45160 - }, - { - "epoch": 0.3993175268303895, - "grad_norm": 3.4967904090881348, - "learning_rate": 4.334470788616018e-05, - "loss": 0.7025, - "step": 45170 - }, - { - "epoch": 0.39940593009070174, - "grad_norm": 8.043968200683594, - "learning_rate": 4.33432344984883e-05, - "loss": 0.6382, - "step": 45180 - }, - { - "epoch": 0.399494333351014, - "grad_norm": 4.333873271942139, - "learning_rate": 4.334176111081644e-05, - "loss": 0.7109, - "step": 45190 - }, - { - "epoch": 0.3995827366113262, - "grad_norm": 3.7081408500671387, - "learning_rate": 4.334028772314456e-05, - "loss": 0.7677, - "step": 45200 - }, - { - "epoch": 0.3996711398716385, - "grad_norm": 6.2343668937683105, - "learning_rate": 4.3338814335472695e-05, - "loss": 0.7573, - "step": 45210 - }, - { - "epoch": 0.3997595431319507, - "grad_norm": 3.258674383163452, - "learning_rate": 4.3337340947800824e-05, - "loss": 0.7545, - "step": 45220 - }, - { - "epoch": 0.39984794639226295, - "grad_norm": 2.7142248153686523, - "learning_rate": 4.333586756012895e-05, - "loss": 0.7248, - "step": 45230 - }, - { - "epoch": 0.3999363496525752, - "grad_norm": 4.933760643005371, - "learning_rate": 4.333439417245708e-05, - "loss": 0.6933, - "step": 45240 - }, - { - "epoch": 0.4000247529128874, - "grad_norm": 4.056682109832764, - "learning_rate": 4.3332920784785215e-05, - "loss": 0.661, - "step": 45250 - }, - { - "epoch": 0.40011315617319965, - "grad_norm": 2.1073946952819824, - "learning_rate": 4.333144739711334e-05, - "loss": 0.8888, - "step": 45260 - }, - { - "epoch": 0.40020155943351193, - "grad_norm": 2.877234935760498, - "learning_rate": 4.332997400944147e-05, - "loss": 0.8523, - "step": 45270 - }, - { - "epoch": 0.40028996269382416, - "grad_norm": 3.1276321411132812, - "learning_rate": 4.33285006217696e-05, - "loss": 0.8019, - "step": 45280 - }, - { - "epoch": 0.4003783659541364, - "grad_norm": 6.948557376861572, - "learning_rate": 4.332702723409773e-05, - "loss": 0.623, - "step": 45290 - }, - { - "epoch": 0.4004667692144486, - "grad_norm": 4.216034889221191, - "learning_rate": 4.332555384642586e-05, - "loss": 0.7965, - "step": 45300 - }, - { - "epoch": 0.40055517247476086, - "grad_norm": 4.658138751983643, - "learning_rate": 4.3324080458753986e-05, - "loss": 0.7569, - "step": 45310 - }, - { - "epoch": 0.4006435757350731, - "grad_norm": 2.5574252605438232, - "learning_rate": 4.3322607071082114e-05, - "loss": 0.7729, - "step": 45320 - }, - { - "epoch": 0.4007319789953854, - "grad_norm": 3.4924285411834717, - "learning_rate": 4.332113368341025e-05, - "loss": 0.6704, - "step": 45330 - }, - { - "epoch": 0.4008203822556976, - "grad_norm": 5.622508525848389, - "learning_rate": 4.331966029573837e-05, - "loss": 0.6332, - "step": 45340 - }, - { - "epoch": 0.40090878551600984, - "grad_norm": 12.408244132995605, - "learning_rate": 4.3318186908066506e-05, - "loss": 0.7196, - "step": 45350 - }, - { - "epoch": 0.40099718877632207, - "grad_norm": 3.5734784603118896, - "learning_rate": 4.3316713520394634e-05, - "loss": 0.8647, - "step": 45360 - }, - { - "epoch": 0.4010855920366343, - "grad_norm": 8.04663372039795, - "learning_rate": 4.331524013272276e-05, - "loss": 0.7479, - "step": 45370 - }, - { - "epoch": 0.40117399529694653, - "grad_norm": 4.209608554840088, - "learning_rate": 4.331376674505089e-05, - "loss": 0.8009, - "step": 45380 - }, - { - "epoch": 0.4012623985572588, - "grad_norm": 2.2174675464630127, - "learning_rate": 4.3312293357379026e-05, - "loss": 0.7681, - "step": 45390 - }, - { - "epoch": 0.40135080181757105, - "grad_norm": 3.37431001663208, - "learning_rate": 4.331081996970715e-05, - "loss": 0.772, - "step": 45400 - }, - { - "epoch": 0.4014392050778833, - "grad_norm": 3.809494733810425, - "learning_rate": 4.330934658203528e-05, - "loss": 0.6842, - "step": 45410 - }, - { - "epoch": 0.4015276083381955, - "grad_norm": 4.1557087898254395, - "learning_rate": 4.3307873194363404e-05, - "loss": 0.8625, - "step": 45420 - }, - { - "epoch": 0.40161601159850774, - "grad_norm": 5.699997425079346, - "learning_rate": 4.330639980669154e-05, - "loss": 0.7146, - "step": 45430 - }, - { - "epoch": 0.40170441485881997, - "grad_norm": 3.7727091312408447, - "learning_rate": 4.330492641901967e-05, - "loss": 0.7158, - "step": 45440 - }, - { - "epoch": 0.40179281811913226, - "grad_norm": 13.562322616577148, - "learning_rate": 4.3303453031347796e-05, - "loss": 0.728, - "step": 45450 - }, - { - "epoch": 0.4018812213794445, - "grad_norm": 8.282513618469238, - "learning_rate": 4.3301979643675924e-05, - "loss": 0.6925, - "step": 45460 - }, - { - "epoch": 0.4019696246397567, - "grad_norm": 3.493535041809082, - "learning_rate": 4.330050625600406e-05, - "loss": 0.7821, - "step": 45470 - }, - { - "epoch": 0.40205802790006895, - "grad_norm": 2.2107646465301514, - "learning_rate": 4.329903286833218e-05, - "loss": 0.7237, - "step": 45480 - }, - { - "epoch": 0.4021464311603812, - "grad_norm": 3.566540241241455, - "learning_rate": 4.3297559480660316e-05, - "loss": 0.7904, - "step": 45490 - }, - { - "epoch": 0.4022348344206934, - "grad_norm": 4.750785827636719, - "learning_rate": 4.3296086092988445e-05, - "loss": 0.7075, - "step": 45500 - }, - { - "epoch": 0.4023232376810057, - "grad_norm": 4.600589752197266, - "learning_rate": 4.329461270531657e-05, - "loss": 0.6793, - "step": 45510 - }, - { - "epoch": 0.4024116409413179, - "grad_norm": 2.5680198669433594, - "learning_rate": 4.32931393176447e-05, - "loss": 0.7164, - "step": 45520 - }, - { - "epoch": 0.40250004420163016, - "grad_norm": 1.9664194583892822, - "learning_rate": 4.3291665929972836e-05, - "loss": 0.7052, - "step": 45530 - }, - { - "epoch": 0.4025884474619424, - "grad_norm": 3.885457754135132, - "learning_rate": 4.329019254230096e-05, - "loss": 0.6824, - "step": 45540 - }, - { - "epoch": 0.4026768507222546, - "grad_norm": 1.6770765781402588, - "learning_rate": 4.328871915462909e-05, - "loss": 0.6608, - "step": 45550 - }, - { - "epoch": 0.40276525398256685, - "grad_norm": 2.748335838317871, - "learning_rate": 4.328724576695722e-05, - "loss": 0.6492, - "step": 45560 - }, - { - "epoch": 0.40285365724287914, - "grad_norm": 2.647151470184326, - "learning_rate": 4.328577237928535e-05, - "loss": 0.6563, - "step": 45570 - }, - { - "epoch": 0.40294206050319137, - "grad_norm": 5.991791725158691, - "learning_rate": 4.328429899161348e-05, - "loss": 0.6322, - "step": 45580 - }, - { - "epoch": 0.4030304637635036, - "grad_norm": 8.063817024230957, - "learning_rate": 4.3282825603941607e-05, - "loss": 0.8245, - "step": 45590 - }, - { - "epoch": 0.40311886702381583, - "grad_norm": 3.5337412357330322, - "learning_rate": 4.3281352216269735e-05, - "loss": 0.8835, - "step": 45600 - }, - { - "epoch": 0.40320727028412806, - "grad_norm": 7.444693088531494, - "learning_rate": 4.327987882859787e-05, - "loss": 0.6971, - "step": 45610 - }, - { - "epoch": 0.4032956735444403, - "grad_norm": 3.863515853881836, - "learning_rate": 4.3278405440926e-05, - "loss": 0.7218, - "step": 45620 - }, - { - "epoch": 0.4033840768047526, - "grad_norm": 7.443853378295898, - "learning_rate": 4.327693205325413e-05, - "loss": 0.74, - "step": 45630 - }, - { - "epoch": 0.4034724800650648, - "grad_norm": 3.8616251945495605, - "learning_rate": 4.3275458665582255e-05, - "loss": 0.7282, - "step": 45640 - }, - { - "epoch": 0.40356088332537704, - "grad_norm": 2.6907570362091064, - "learning_rate": 4.3273985277910383e-05, - "loss": 0.6497, - "step": 45650 - }, - { - "epoch": 0.40364928658568927, - "grad_norm": 2.6877543926239014, - "learning_rate": 4.327251189023851e-05, - "loss": 0.6957, - "step": 45660 - }, - { - "epoch": 0.4037376898460015, - "grad_norm": 2.5589005947113037, - "learning_rate": 4.327103850256664e-05, - "loss": 0.6228, - "step": 45670 - }, - { - "epoch": 0.4038260931063138, - "grad_norm": 3.4897139072418213, - "learning_rate": 4.3269565114894775e-05, - "loss": 0.6918, - "step": 45680 - }, - { - "epoch": 0.403914496366626, - "grad_norm": 7.576402187347412, - "learning_rate": 4.3268091727222904e-05, - "loss": 0.6894, - "step": 45690 - }, - { - "epoch": 0.40400289962693825, - "grad_norm": 3.203425407409668, - "learning_rate": 4.326661833955103e-05, - "loss": 0.8509, - "step": 45700 - }, - { - "epoch": 0.4040913028872505, - "grad_norm": 6.4992876052856445, - "learning_rate": 4.326514495187916e-05, - "loss": 0.5883, - "step": 45710 - }, - { - "epoch": 0.4041797061475627, - "grad_norm": 4.828888893127441, - "learning_rate": 4.326367156420729e-05, - "loss": 0.731, - "step": 45720 - }, - { - "epoch": 0.40426810940787494, - "grad_norm": 7.019105911254883, - "learning_rate": 4.326219817653542e-05, - "loss": 0.7222, - "step": 45730 - }, - { - "epoch": 0.40435651266818723, - "grad_norm": 4.875137805938721, - "learning_rate": 4.326072478886355e-05, - "loss": 0.6626, - "step": 45740 - }, - { - "epoch": 0.40444491592849946, - "grad_norm": 6.4282941818237305, - "learning_rate": 4.325925140119168e-05, - "loss": 0.6805, - "step": 45750 - }, - { - "epoch": 0.4045333191888117, - "grad_norm": 5.351105213165283, - "learning_rate": 4.325777801351981e-05, - "loss": 0.6759, - "step": 45760 - }, - { - "epoch": 0.4046217224491239, - "grad_norm": 5.707307815551758, - "learning_rate": 4.325630462584794e-05, - "loss": 0.6961, - "step": 45770 - }, - { - "epoch": 0.40471012570943615, - "grad_norm": 13.36841869354248, - "learning_rate": 4.3254831238176066e-05, - "loss": 0.6734, - "step": 45780 - }, - { - "epoch": 0.4047985289697484, - "grad_norm": 2.540553569793701, - "learning_rate": 4.3253357850504194e-05, - "loss": 0.6704, - "step": 45790 - }, - { - "epoch": 0.40488693223006067, - "grad_norm": 1.2700477838516235, - "learning_rate": 4.325188446283233e-05, - "loss": 0.6393, - "step": 45800 - }, - { - "epoch": 0.4049753354903729, - "grad_norm": 7.109341621398926, - "learning_rate": 4.325041107516045e-05, - "loss": 0.6969, - "step": 45810 - }, - { - "epoch": 0.40506373875068513, - "grad_norm": 3.18410325050354, - "learning_rate": 4.3248937687488586e-05, - "loss": 0.603, - "step": 45820 - }, - { - "epoch": 0.40515214201099736, - "grad_norm": 3.89701771736145, - "learning_rate": 4.3247464299816714e-05, - "loss": 0.758, - "step": 45830 - }, - { - "epoch": 0.4052405452713096, - "grad_norm": 6.603641986846924, - "learning_rate": 4.324599091214484e-05, - "loss": 0.7111, - "step": 45840 - }, - { - "epoch": 0.4053289485316218, - "grad_norm": 10.412839889526367, - "learning_rate": 4.324451752447297e-05, - "loss": 0.7406, - "step": 45850 - }, - { - "epoch": 0.4054173517919341, - "grad_norm": 3.6471614837646484, - "learning_rate": 4.3243044136801106e-05, - "loss": 0.7184, - "step": 45860 - }, - { - "epoch": 0.40550575505224634, - "grad_norm": 12.33071517944336, - "learning_rate": 4.324157074912923e-05, - "loss": 0.7196, - "step": 45870 - }, - { - "epoch": 0.4055941583125586, - "grad_norm": 4.292669296264648, - "learning_rate": 4.324009736145736e-05, - "loss": 0.7329, - "step": 45880 - }, - { - "epoch": 0.4056825615728708, - "grad_norm": 3.919067859649658, - "learning_rate": 4.3238623973785484e-05, - "loss": 0.7455, - "step": 45890 - }, - { - "epoch": 0.40577096483318303, - "grad_norm": 3.294001817703247, - "learning_rate": 4.323715058611362e-05, - "loss": 0.8079, - "step": 45900 - }, - { - "epoch": 0.40585936809349527, - "grad_norm": 1.6107417345046997, - "learning_rate": 4.323567719844175e-05, - "loss": 0.6853, - "step": 45910 - }, - { - "epoch": 0.40594777135380755, - "grad_norm": 12.0560884475708, - "learning_rate": 4.3234203810769876e-05, - "loss": 0.6912, - "step": 45920 - }, - { - "epoch": 0.4060361746141198, - "grad_norm": 1.6195980310440063, - "learning_rate": 4.3232730423098004e-05, - "loss": 0.6615, - "step": 45930 - }, - { - "epoch": 0.406124577874432, - "grad_norm": 5.623993873596191, - "learning_rate": 4.323125703542614e-05, - "loss": 0.7895, - "step": 45940 - }, - { - "epoch": 0.40621298113474424, - "grad_norm": 4.299506187438965, - "learning_rate": 4.322978364775426e-05, - "loss": 0.6948, - "step": 45950 - }, - { - "epoch": 0.4063013843950565, - "grad_norm": 2.0287930965423584, - "learning_rate": 4.3228310260082396e-05, - "loss": 0.7466, - "step": 45960 - }, - { - "epoch": 0.4063897876553687, - "grad_norm": 2.0097010135650635, - "learning_rate": 4.3226836872410525e-05, - "loss": 0.7427, - "step": 45970 - }, - { - "epoch": 0.406478190915681, - "grad_norm": 2.9460787773132324, - "learning_rate": 4.322536348473865e-05, - "loss": 0.6041, - "step": 45980 - }, - { - "epoch": 0.4065665941759932, - "grad_norm": 6.002807140350342, - "learning_rate": 4.322389009706678e-05, - "loss": 0.6147, - "step": 45990 - }, - { - "epoch": 0.40665499743630545, - "grad_norm": 1.2274689674377441, - "learning_rate": 4.3222416709394917e-05, - "loss": 0.6377, - "step": 46000 - }, - { - "epoch": 0.4067434006966177, - "grad_norm": 3.9840760231018066, - "learning_rate": 4.322094332172304e-05, - "loss": 0.7048, - "step": 46010 - }, - { - "epoch": 0.4068318039569299, - "grad_norm": 2.5341808795928955, - "learning_rate": 4.321946993405117e-05, - "loss": 0.703, - "step": 46020 - }, - { - "epoch": 0.40692020721724215, - "grad_norm": 2.6240248680114746, - "learning_rate": 4.3217996546379295e-05, - "loss": 0.592, - "step": 46030 - }, - { - "epoch": 0.40700861047755443, - "grad_norm": 4.737199783325195, - "learning_rate": 4.321652315870743e-05, - "loss": 0.7852, - "step": 46040 - }, - { - "epoch": 0.40709701373786666, - "grad_norm": 6.359683036804199, - "learning_rate": 4.321504977103556e-05, - "loss": 0.6721, - "step": 46050 - }, - { - "epoch": 0.4071854169981789, - "grad_norm": 2.270880699157715, - "learning_rate": 4.321357638336369e-05, - "loss": 0.6013, - "step": 46060 - }, - { - "epoch": 0.4072738202584911, - "grad_norm": 6.392951488494873, - "learning_rate": 4.3212102995691815e-05, - "loss": 0.8781, - "step": 46070 - }, - { - "epoch": 0.40736222351880336, - "grad_norm": 2.4666483402252197, - "learning_rate": 4.321062960801995e-05, - "loss": 0.6326, - "step": 46080 - }, - { - "epoch": 0.4074506267791156, - "grad_norm": 6.785998344421387, - "learning_rate": 4.320915622034807e-05, - "loss": 0.7256, - "step": 46090 - }, - { - "epoch": 0.4075390300394279, - "grad_norm": 4.3129472732543945, - "learning_rate": 4.320768283267621e-05, - "loss": 0.7233, - "step": 46100 - }, - { - "epoch": 0.4076274332997401, - "grad_norm": 2.7423253059387207, - "learning_rate": 4.3206209445004335e-05, - "loss": 0.5911, - "step": 46110 - }, - { - "epoch": 0.40771583656005234, - "grad_norm": 4.845990180969238, - "learning_rate": 4.3204736057332464e-05, - "loss": 0.7796, - "step": 46120 - }, - { - "epoch": 0.40780423982036457, - "grad_norm": 6.406247138977051, - "learning_rate": 4.320326266966059e-05, - "loss": 0.7896, - "step": 46130 - }, - { - "epoch": 0.4078926430806768, - "grad_norm": 5.142701625823975, - "learning_rate": 4.320178928198872e-05, - "loss": 0.7059, - "step": 46140 - }, - { - "epoch": 0.40798104634098903, - "grad_norm": 6.629715442657471, - "learning_rate": 4.320031589431685e-05, - "loss": 0.7531, - "step": 46150 - }, - { - "epoch": 0.4080694496013013, - "grad_norm": 2.1500115394592285, - "learning_rate": 4.3198842506644984e-05, - "loss": 0.7188, - "step": 46160 - }, - { - "epoch": 0.40815785286161355, - "grad_norm": 3.7265231609344482, - "learning_rate": 4.3197369118973105e-05, - "loss": 0.6814, - "step": 46170 - }, - { - "epoch": 0.4082462561219258, - "grad_norm": 5.071183681488037, - "learning_rate": 4.319589573130124e-05, - "loss": 0.7336, - "step": 46180 - }, - { - "epoch": 0.408334659382238, - "grad_norm": 4.056783199310303, - "learning_rate": 4.319442234362937e-05, - "loss": 0.6939, - "step": 46190 - }, - { - "epoch": 0.40842306264255024, - "grad_norm": 4.755985260009766, - "learning_rate": 4.31929489559575e-05, - "loss": 0.6514, - "step": 46200 - }, - { - "epoch": 0.4085114659028625, - "grad_norm": 1.7640128135681152, - "learning_rate": 4.3191475568285625e-05, - "loss": 0.7348, - "step": 46210 - }, - { - "epoch": 0.40859986916317476, - "grad_norm": 8.810541152954102, - "learning_rate": 4.319000218061376e-05, - "loss": 0.6172, - "step": 46220 - }, - { - "epoch": 0.408688272423487, - "grad_norm": 10.951849937438965, - "learning_rate": 4.318852879294188e-05, - "loss": 0.8489, - "step": 46230 - }, - { - "epoch": 0.4087766756837992, - "grad_norm": 5.246570110321045, - "learning_rate": 4.318705540527002e-05, - "loss": 0.7214, - "step": 46240 - }, - { - "epoch": 0.40886507894411145, - "grad_norm": 2.8555872440338135, - "learning_rate": 4.318558201759814e-05, - "loss": 0.6974, - "step": 46250 - }, - { - "epoch": 0.4089534822044237, - "grad_norm": 6.7091264724731445, - "learning_rate": 4.3184108629926274e-05, - "loss": 0.6762, - "step": 46260 - }, - { - "epoch": 0.40904188546473597, - "grad_norm": 4.609272480010986, - "learning_rate": 4.31826352422544e-05, - "loss": 0.5934, - "step": 46270 - }, - { - "epoch": 0.4091302887250482, - "grad_norm": 8.799849510192871, - "learning_rate": 4.318116185458253e-05, - "loss": 0.7227, - "step": 46280 - }, - { - "epoch": 0.4092186919853604, - "grad_norm": 7.23222017288208, - "learning_rate": 4.317968846691066e-05, - "loss": 0.7064, - "step": 46290 - }, - { - "epoch": 0.40930709524567266, - "grad_norm": 3.8420917987823486, - "learning_rate": 4.3178215079238794e-05, - "loss": 0.6214, - "step": 46300 - }, - { - "epoch": 0.4093954985059849, - "grad_norm": 2.3287384510040283, - "learning_rate": 4.3176741691566916e-05, - "loss": 0.7229, - "step": 46310 - }, - { - "epoch": 0.4094839017662971, - "grad_norm": 6.242074012756348, - "learning_rate": 4.317526830389505e-05, - "loss": 0.7189, - "step": 46320 - }, - { - "epoch": 0.4095723050266094, - "grad_norm": 8.26706600189209, - "learning_rate": 4.317379491622318e-05, - "loss": 0.826, - "step": 46330 - }, - { - "epoch": 0.40966070828692164, - "grad_norm": 8.299762725830078, - "learning_rate": 4.317232152855131e-05, - "loss": 0.7992, - "step": 46340 - }, - { - "epoch": 0.40974911154723387, - "grad_norm": 1.838484764099121, - "learning_rate": 4.3170848140879436e-05, - "loss": 0.7939, - "step": 46350 - }, - { - "epoch": 0.4098375148075461, - "grad_norm": 2.187119483947754, - "learning_rate": 4.3169374753207564e-05, - "loss": 0.7924, - "step": 46360 - }, - { - "epoch": 0.40992591806785833, - "grad_norm": 4.523279666900635, - "learning_rate": 4.316790136553569e-05, - "loss": 0.6176, - "step": 46370 - }, - { - "epoch": 0.41001432132817056, - "grad_norm": 10.365835189819336, - "learning_rate": 4.316642797786383e-05, - "loss": 0.7321, - "step": 46380 - }, - { - "epoch": 0.41010272458848285, - "grad_norm": 1.1902995109558105, - "learning_rate": 4.316495459019195e-05, - "loss": 0.6697, - "step": 46390 - }, - { - "epoch": 0.4101911278487951, - "grad_norm": 5.285069942474365, - "learning_rate": 4.3163481202520085e-05, - "loss": 0.7626, - "step": 46400 - }, - { - "epoch": 0.4102795311091073, - "grad_norm": 6.092878341674805, - "learning_rate": 4.316200781484821e-05, - "loss": 0.6946, - "step": 46410 - }, - { - "epoch": 0.41036793436941954, - "grad_norm": 3.1772255897521973, - "learning_rate": 4.316053442717634e-05, - "loss": 0.6973, - "step": 46420 - }, - { - "epoch": 0.41045633762973177, - "grad_norm": 5.581402778625488, - "learning_rate": 4.315906103950447e-05, - "loss": 0.6844, - "step": 46430 - }, - { - "epoch": 0.410544740890044, - "grad_norm": 2.424489974975586, - "learning_rate": 4.3157587651832605e-05, - "loss": 0.7254, - "step": 46440 - }, - { - "epoch": 0.4106331441503563, - "grad_norm": 12.22135066986084, - "learning_rate": 4.3156114264160726e-05, - "loss": 0.641, - "step": 46450 - }, - { - "epoch": 0.4107215474106685, - "grad_norm": 4.146501064300537, - "learning_rate": 4.315464087648886e-05, - "loss": 0.7661, - "step": 46460 - }, - { - "epoch": 0.41080995067098075, - "grad_norm": 2.738677978515625, - "learning_rate": 4.315316748881699e-05, - "loss": 0.6031, - "step": 46470 - }, - { - "epoch": 0.410898353931293, - "grad_norm": 6.335766315460205, - "learning_rate": 4.315169410114512e-05, - "loss": 0.6432, - "step": 46480 - }, - { - "epoch": 0.4109867571916052, - "grad_norm": 7.835579872131348, - "learning_rate": 4.3150220713473247e-05, - "loss": 0.7454, - "step": 46490 - }, - { - "epoch": 0.41107516045191744, - "grad_norm": 6.93509578704834, - "learning_rate": 4.3148747325801375e-05, - "loss": 0.7536, - "step": 46500 - }, - { - "epoch": 0.41116356371222973, - "grad_norm": 3.279168128967285, - "learning_rate": 4.31472739381295e-05, - "loss": 0.8043, - "step": 46510 - }, - { - "epoch": 0.41125196697254196, - "grad_norm": 1.5511794090270996, - "learning_rate": 4.314580055045764e-05, - "loss": 0.5827, - "step": 46520 - }, - { - "epoch": 0.4113403702328542, - "grad_norm": 3.3686983585357666, - "learning_rate": 4.314432716278577e-05, - "loss": 0.8558, - "step": 46530 - }, - { - "epoch": 0.4114287734931664, - "grad_norm": 4.269227504730225, - "learning_rate": 4.3142853775113895e-05, - "loss": 0.7983, - "step": 46540 - }, - { - "epoch": 0.41151717675347865, - "grad_norm": 3.802067756652832, - "learning_rate": 4.3141380387442023e-05, - "loss": 0.8038, - "step": 46550 - }, - { - "epoch": 0.4116055800137909, - "grad_norm": 3.33076548576355, - "learning_rate": 4.313990699977015e-05, - "loss": 0.6559, - "step": 46560 - }, - { - "epoch": 0.41169398327410317, - "grad_norm": 3.8315722942352295, - "learning_rate": 4.313843361209828e-05, - "loss": 0.7363, - "step": 46570 - }, - { - "epoch": 0.4117823865344154, - "grad_norm": 4.44300651550293, - "learning_rate": 4.3136960224426415e-05, - "loss": 0.8118, - "step": 46580 - }, - { - "epoch": 0.41187078979472763, - "grad_norm": 1.329114556312561, - "learning_rate": 4.3135486836754544e-05, - "loss": 0.5832, - "step": 46590 - }, - { - "epoch": 0.41195919305503986, - "grad_norm": 1.7089744806289673, - "learning_rate": 4.313401344908267e-05, - "loss": 0.6526, - "step": 46600 - }, - { - "epoch": 0.4120475963153521, - "grad_norm": 6.235609531402588, - "learning_rate": 4.31325400614108e-05, - "loss": 0.6764, - "step": 46610 - }, - { - "epoch": 0.4121359995756643, - "grad_norm": 1.7314995527267456, - "learning_rate": 4.313106667373893e-05, - "loss": 0.5767, - "step": 46620 - }, - { - "epoch": 0.4122244028359766, - "grad_norm": 4.788182735443115, - "learning_rate": 4.3129593286067064e-05, - "loss": 0.7667, - "step": 46630 - }, - { - "epoch": 0.41231280609628884, - "grad_norm": 2.273735284805298, - "learning_rate": 4.3128119898395185e-05, - "loss": 0.5933, - "step": 46640 - }, - { - "epoch": 0.4124012093566011, - "grad_norm": 9.519006729125977, - "learning_rate": 4.312664651072332e-05, - "loss": 0.6746, - "step": 46650 - }, - { - "epoch": 0.4124896126169133, - "grad_norm": 10.015620231628418, - "learning_rate": 4.312517312305145e-05, - "loss": 0.6826, - "step": 46660 - }, - { - "epoch": 0.41257801587722553, - "grad_norm": 4.5188798904418945, - "learning_rate": 4.312369973537958e-05, - "loss": 0.7965, - "step": 46670 - }, - { - "epoch": 0.41266641913753777, - "grad_norm": 11.5077486038208, - "learning_rate": 4.3122226347707706e-05, - "loss": 0.7107, - "step": 46680 - }, - { - "epoch": 0.41275482239785005, - "grad_norm": 4.420197486877441, - "learning_rate": 4.312075296003584e-05, - "loss": 0.8013, - "step": 46690 - }, - { - "epoch": 0.4128432256581623, - "grad_norm": 3.409897565841675, - "learning_rate": 4.311927957236396e-05, - "loss": 0.5752, - "step": 46700 - }, - { - "epoch": 0.4129316289184745, - "grad_norm": 5.816707134246826, - "learning_rate": 4.31178061846921e-05, - "loss": 0.8147, - "step": 46710 - }, - { - "epoch": 0.41302003217878674, - "grad_norm": 2.3938848972320557, - "learning_rate": 4.311633279702022e-05, - "loss": 0.6432, - "step": 46720 - }, - { - "epoch": 0.413108435439099, - "grad_norm": 6.591314315795898, - "learning_rate": 4.3114859409348354e-05, - "loss": 0.5776, - "step": 46730 - }, - { - "epoch": 0.41319683869941126, - "grad_norm": 3.783867835998535, - "learning_rate": 4.311338602167648e-05, - "loss": 0.8421, - "step": 46740 - }, - { - "epoch": 0.4132852419597235, - "grad_norm": 3.204338788986206, - "learning_rate": 4.311191263400461e-05, - "loss": 0.8047, - "step": 46750 - }, - { - "epoch": 0.4133736452200357, - "grad_norm": 4.927525997161865, - "learning_rate": 4.311043924633274e-05, - "loss": 0.761, - "step": 46760 - }, - { - "epoch": 0.41346204848034795, - "grad_norm": 3.0009915828704834, - "learning_rate": 4.3108965858660874e-05, - "loss": 0.6035, - "step": 46770 - }, - { - "epoch": 0.4135504517406602, - "grad_norm": 3.8898062705993652, - "learning_rate": 4.3107492470988996e-05, - "loss": 0.7727, - "step": 46780 - }, - { - "epoch": 0.4136388550009724, - "grad_norm": 9.556239128112793, - "learning_rate": 4.310601908331713e-05, - "loss": 0.5699, - "step": 46790 - }, - { - "epoch": 0.4137272582612847, - "grad_norm": 4.684115409851074, - "learning_rate": 4.310454569564526e-05, - "loss": 0.7099, - "step": 46800 - }, - { - "epoch": 0.41381566152159693, - "grad_norm": 10.142693519592285, - "learning_rate": 4.310307230797339e-05, - "loss": 0.7854, - "step": 46810 - }, - { - "epoch": 0.41390406478190916, - "grad_norm": 8.784010887145996, - "learning_rate": 4.3101598920301516e-05, - "loss": 0.7731, - "step": 46820 - }, - { - "epoch": 0.4139924680422214, - "grad_norm": 3.1128392219543457, - "learning_rate": 4.3100125532629644e-05, - "loss": 0.6951, - "step": 46830 - }, - { - "epoch": 0.4140808713025336, - "grad_norm": 4.86821174621582, - "learning_rate": 4.309865214495777e-05, - "loss": 0.7843, - "step": 46840 - }, - { - "epoch": 0.41416927456284586, - "grad_norm": 5.835916996002197, - "learning_rate": 4.309717875728591e-05, - "loss": 0.715, - "step": 46850 - }, - { - "epoch": 0.41425767782315814, - "grad_norm": 5.9440131187438965, - "learning_rate": 4.309570536961403e-05, - "loss": 0.9036, - "step": 46860 - }, - { - "epoch": 0.4143460810834704, - "grad_norm": 3.788001775741577, - "learning_rate": 4.3094231981942165e-05, - "loss": 0.7966, - "step": 46870 - }, - { - "epoch": 0.4144344843437826, - "grad_norm": 3.809252977371216, - "learning_rate": 4.309275859427029e-05, - "loss": 0.7644, - "step": 46880 - }, - { - "epoch": 0.41452288760409484, - "grad_norm": 2.9296512603759766, - "learning_rate": 4.309128520659842e-05, - "loss": 0.6768, - "step": 46890 - }, - { - "epoch": 0.41461129086440707, - "grad_norm": 3.632952928543091, - "learning_rate": 4.308981181892655e-05, - "loss": 0.792, - "step": 46900 - }, - { - "epoch": 0.4146996941247193, - "grad_norm": 2.2417125701904297, - "learning_rate": 4.3088338431254685e-05, - "loss": 0.557, - "step": 46910 - }, - { - "epoch": 0.4147880973850316, - "grad_norm": 4.355308532714844, - "learning_rate": 4.3086865043582806e-05, - "loss": 0.7944, - "step": 46920 - }, - { - "epoch": 0.4148765006453438, - "grad_norm": 7.980536937713623, - "learning_rate": 4.308539165591094e-05, - "loss": 0.7624, - "step": 46930 - }, - { - "epoch": 0.41496490390565605, - "grad_norm": 9.85515308380127, - "learning_rate": 4.308391826823907e-05, - "loss": 0.7009, - "step": 46940 - }, - { - "epoch": 0.4150533071659683, - "grad_norm": 7.113284111022949, - "learning_rate": 4.30824448805672e-05, - "loss": 0.7405, - "step": 46950 - }, - { - "epoch": 0.4151417104262805, - "grad_norm": 5.405037879943848, - "learning_rate": 4.3080971492895327e-05, - "loss": 0.7009, - "step": 46960 - }, - { - "epoch": 0.41523011368659274, - "grad_norm": 3.425945520401001, - "learning_rate": 4.3079498105223455e-05, - "loss": 0.7684, - "step": 46970 - }, - { - "epoch": 0.415318516946905, - "grad_norm": 2.992239236831665, - "learning_rate": 4.307802471755158e-05, - "loss": 0.6517, - "step": 46980 - }, - { - "epoch": 0.41540692020721726, - "grad_norm": 2.573235511779785, - "learning_rate": 4.307655132987972e-05, - "loss": 0.6795, - "step": 46990 - }, - { - "epoch": 0.4154953234675295, - "grad_norm": 2.5305981636047363, - "learning_rate": 4.307507794220784e-05, - "loss": 0.6851, - "step": 47000 - }, - { - "epoch": 0.4155837267278417, - "grad_norm": 2.1519672870635986, - "learning_rate": 4.3073604554535975e-05, - "loss": 0.619, - "step": 47010 - }, - { - "epoch": 0.41567212998815395, - "grad_norm": 5.283392906188965, - "learning_rate": 4.3072131166864103e-05, - "loss": 0.8358, - "step": 47020 - }, - { - "epoch": 0.4157605332484662, - "grad_norm": 3.2216572761535645, - "learning_rate": 4.307065777919223e-05, - "loss": 0.7383, - "step": 47030 - }, - { - "epoch": 0.41584893650877847, - "grad_norm": 2.4524753093719482, - "learning_rate": 4.306918439152036e-05, - "loss": 0.7274, - "step": 47040 - }, - { - "epoch": 0.4159373397690907, - "grad_norm": 3.2131965160369873, - "learning_rate": 4.3067711003848495e-05, - "loss": 0.6277, - "step": 47050 - }, - { - "epoch": 0.41602574302940293, - "grad_norm": 1.1415796279907227, - "learning_rate": 4.306623761617662e-05, - "loss": 0.624, - "step": 47060 - }, - { - "epoch": 0.41611414628971516, - "grad_norm": 3.8114659786224365, - "learning_rate": 4.306476422850475e-05, - "loss": 0.6871, - "step": 47070 - }, - { - "epoch": 0.4162025495500274, - "grad_norm": 3.4970264434814453, - "learning_rate": 4.3063290840832874e-05, - "loss": 0.6781, - "step": 47080 - }, - { - "epoch": 0.4162909528103396, - "grad_norm": 3.1682686805725098, - "learning_rate": 4.306181745316101e-05, - "loss": 0.6686, - "step": 47090 - }, - { - "epoch": 0.4163793560706519, - "grad_norm": 2.582796573638916, - "learning_rate": 4.306034406548914e-05, - "loss": 0.7516, - "step": 47100 - }, - { - "epoch": 0.41646775933096414, - "grad_norm": 1.6551960706710815, - "learning_rate": 4.3058870677817265e-05, - "loss": 0.6789, - "step": 47110 - }, - { - "epoch": 0.41655616259127637, - "grad_norm": 2.9809072017669678, - "learning_rate": 4.3057397290145394e-05, - "loss": 0.619, - "step": 47120 - }, - { - "epoch": 0.4166445658515886, - "grad_norm": 4.427015781402588, - "learning_rate": 4.305592390247353e-05, - "loss": 0.6953, - "step": 47130 - }, - { - "epoch": 0.41673296911190083, - "grad_norm": 2.082231283187866, - "learning_rate": 4.305445051480165e-05, - "loss": 0.7311, - "step": 47140 - }, - { - "epoch": 0.41682137237221306, - "grad_norm": 2.5461673736572266, - "learning_rate": 4.3052977127129786e-05, - "loss": 0.6575, - "step": 47150 - }, - { - "epoch": 0.41690977563252535, - "grad_norm": 2.1919140815734863, - "learning_rate": 4.3051503739457914e-05, - "loss": 0.7268, - "step": 47160 - }, - { - "epoch": 0.4169981788928376, - "grad_norm": 2.6959102153778076, - "learning_rate": 4.305003035178604e-05, - "loss": 0.6995, - "step": 47170 - }, - { - "epoch": 0.4170865821531498, - "grad_norm": 4.8872456550598145, - "learning_rate": 4.304855696411417e-05, - "loss": 0.7612, - "step": 47180 - }, - { - "epoch": 0.41717498541346204, - "grad_norm": 1.840260624885559, - "learning_rate": 4.30470835764423e-05, - "loss": 0.5063, - "step": 47190 - }, - { - "epoch": 0.41726338867377427, - "grad_norm": 1.9699889421463013, - "learning_rate": 4.304561018877043e-05, - "loss": 0.7761, - "step": 47200 - }, - { - "epoch": 0.4173517919340865, - "grad_norm": 6.508878231048584, - "learning_rate": 4.304413680109856e-05, - "loss": 0.618, - "step": 47210 - }, - { - "epoch": 0.4174401951943988, - "grad_norm": 4.580071449279785, - "learning_rate": 4.3042663413426684e-05, - "loss": 0.6685, - "step": 47220 - }, - { - "epoch": 0.417528598454711, - "grad_norm": 1.9174926280975342, - "learning_rate": 4.304119002575482e-05, - "loss": 0.8065, - "step": 47230 - }, - { - "epoch": 0.41761700171502325, - "grad_norm": 4.042043209075928, - "learning_rate": 4.303971663808295e-05, - "loss": 0.747, - "step": 47240 - }, - { - "epoch": 0.4177054049753355, - "grad_norm": 5.927581310272217, - "learning_rate": 4.3038243250411076e-05, - "loss": 0.7249, - "step": 47250 - }, - { - "epoch": 0.4177938082356477, - "grad_norm": 7.885900974273682, - "learning_rate": 4.3036769862739204e-05, - "loss": 0.6813, - "step": 47260 - }, - { - "epoch": 0.41788221149596, - "grad_norm": 3.979492425918579, - "learning_rate": 4.303529647506734e-05, - "loss": 0.6934, - "step": 47270 - }, - { - "epoch": 0.41797061475627223, - "grad_norm": 13.328450202941895, - "learning_rate": 4.303382308739546e-05, - "loss": 0.7064, - "step": 47280 - }, - { - "epoch": 0.41805901801658446, - "grad_norm": 4.928228378295898, - "learning_rate": 4.3032349699723596e-05, - "loss": 0.7181, - "step": 47290 - }, - { - "epoch": 0.4181474212768967, - "grad_norm": 9.209067344665527, - "learning_rate": 4.303087631205172e-05, - "loss": 0.7984, - "step": 47300 - }, - { - "epoch": 0.4182358245372089, - "grad_norm": 1.4937260150909424, - "learning_rate": 4.302940292437985e-05, - "loss": 0.7779, - "step": 47310 - }, - { - "epoch": 0.41832422779752115, - "grad_norm": 5.342955589294434, - "learning_rate": 4.302792953670798e-05, - "loss": 0.7885, - "step": 47320 - }, - { - "epoch": 0.41841263105783344, - "grad_norm": 2.092026710510254, - "learning_rate": 4.302645614903611e-05, - "loss": 0.6582, - "step": 47330 - }, - { - "epoch": 0.41850103431814567, - "grad_norm": 7.898820400238037, - "learning_rate": 4.302498276136424e-05, - "loss": 0.7518, - "step": 47340 - }, - { - "epoch": 0.4185894375784579, - "grad_norm": 2.6180171966552734, - "learning_rate": 4.302350937369237e-05, - "loss": 0.6744, - "step": 47350 - }, - { - "epoch": 0.41867784083877013, - "grad_norm": 2.7855191230773926, - "learning_rate": 4.30220359860205e-05, - "loss": 0.6565, - "step": 47360 - }, - { - "epoch": 0.41876624409908236, - "grad_norm": 5.365550518035889, - "learning_rate": 4.302056259834863e-05, - "loss": 0.7078, - "step": 47370 - }, - { - "epoch": 0.4188546473593946, - "grad_norm": 5.605372905731201, - "learning_rate": 4.301908921067676e-05, - "loss": 0.703, - "step": 47380 - }, - { - "epoch": 0.4189430506197069, - "grad_norm": 9.14799690246582, - "learning_rate": 4.3017615823004886e-05, - "loss": 0.5925, - "step": 47390 - }, - { - "epoch": 0.4190314538800191, - "grad_norm": 4.325501441955566, - "learning_rate": 4.3016142435333015e-05, - "loss": 0.7511, - "step": 47400 - }, - { - "epoch": 0.41911985714033134, - "grad_norm": 2.7949655055999756, - "learning_rate": 4.301466904766115e-05, - "loss": 0.7299, - "step": 47410 - }, - { - "epoch": 0.4192082604006436, - "grad_norm": 5.639239311218262, - "learning_rate": 4.301319565998928e-05, - "loss": 0.804, - "step": 47420 - }, - { - "epoch": 0.4192966636609558, - "grad_norm": 1.233499526977539, - "learning_rate": 4.301172227231741e-05, - "loss": 0.7503, - "step": 47430 - }, - { - "epoch": 0.41938506692126803, - "grad_norm": 4.049893379211426, - "learning_rate": 4.3010248884645535e-05, - "loss": 0.762, - "step": 47440 - }, - { - "epoch": 0.4194734701815803, - "grad_norm": 2.9958951473236084, - "learning_rate": 4.300877549697366e-05, - "loss": 0.7079, - "step": 47450 - }, - { - "epoch": 0.41956187344189255, - "grad_norm": 2.1400082111358643, - "learning_rate": 4.300730210930179e-05, - "loss": 0.7352, - "step": 47460 - }, - { - "epoch": 0.4196502767022048, - "grad_norm": 3.1742537021636963, - "learning_rate": 4.300582872162992e-05, - "loss": 0.7544, - "step": 47470 - }, - { - "epoch": 0.419738679962517, - "grad_norm": 3.2784366607666016, - "learning_rate": 4.3004355333958055e-05, - "loss": 0.8853, - "step": 47480 - }, - { - "epoch": 0.41982708322282924, - "grad_norm": 5.5541815757751465, - "learning_rate": 4.3002881946286184e-05, - "loss": 0.7872, - "step": 47490 - }, - { - "epoch": 0.4199154864831415, - "grad_norm": 1.592294692993164, - "learning_rate": 4.300140855861431e-05, - "loss": 0.6647, - "step": 47500 - }, - { - "epoch": 0.42000388974345376, - "grad_norm": 2.8090524673461914, - "learning_rate": 4.299993517094244e-05, - "loss": 0.708, - "step": 47510 - }, - { - "epoch": 0.420092293003766, - "grad_norm": 2.53983736038208, - "learning_rate": 4.299846178327057e-05, - "loss": 0.7153, - "step": 47520 - }, - { - "epoch": 0.4201806962640782, - "grad_norm": 3.2868728637695312, - "learning_rate": 4.29969883955987e-05, - "loss": 0.6632, - "step": 47530 - }, - { - "epoch": 0.42026909952439045, - "grad_norm": 3.456585168838501, - "learning_rate": 4.299551500792683e-05, - "loss": 0.745, - "step": 47540 - }, - { - "epoch": 0.4203575027847027, - "grad_norm": 4.485711574554443, - "learning_rate": 4.2994041620254954e-05, - "loss": 0.7192, - "step": 47550 - }, - { - "epoch": 0.4204459060450149, - "grad_norm": 4.93747615814209, - "learning_rate": 4.299256823258309e-05, - "loss": 0.76, - "step": 47560 - }, - { - "epoch": 0.4205343093053272, - "grad_norm": 1.0447183847427368, - "learning_rate": 4.299109484491122e-05, - "loss": 0.6799, - "step": 47570 - }, - { - "epoch": 0.42062271256563943, - "grad_norm": 1.838577389717102, - "learning_rate": 4.2989621457239346e-05, - "loss": 0.62, - "step": 47580 - }, - { - "epoch": 0.42071111582595166, - "grad_norm": 5.7548017501831055, - "learning_rate": 4.2988148069567474e-05, - "loss": 0.6714, - "step": 47590 - }, - { - "epoch": 0.4207995190862639, - "grad_norm": 5.040812015533447, - "learning_rate": 4.298667468189561e-05, - "loss": 0.5932, - "step": 47600 - }, - { - "epoch": 0.4208879223465761, - "grad_norm": 2.7236719131469727, - "learning_rate": 4.298520129422373e-05, - "loss": 0.7645, - "step": 47610 - }, - { - "epoch": 0.42097632560688836, - "grad_norm": 8.309236526489258, - "learning_rate": 4.2983727906551866e-05, - "loss": 0.6699, - "step": 47620 - }, - { - "epoch": 0.42106472886720064, - "grad_norm": 9.064888000488281, - "learning_rate": 4.2982254518879994e-05, - "loss": 0.7382, - "step": 47630 - }, - { - "epoch": 0.4211531321275129, - "grad_norm": 9.858525276184082, - "learning_rate": 4.298078113120812e-05, - "loss": 0.7338, - "step": 47640 - }, - { - "epoch": 0.4212415353878251, - "grad_norm": 6.124359607696533, - "learning_rate": 4.297930774353625e-05, - "loss": 0.6258, - "step": 47650 - }, - { - "epoch": 0.42132993864813734, - "grad_norm": 2.7527408599853516, - "learning_rate": 4.297783435586438e-05, - "loss": 0.8399, - "step": 47660 - }, - { - "epoch": 0.42141834190844957, - "grad_norm": 3.949218988418579, - "learning_rate": 4.297636096819251e-05, - "loss": 0.6427, - "step": 47670 - }, - { - "epoch": 0.4215067451687618, - "grad_norm": 3.412393569946289, - "learning_rate": 4.297488758052064e-05, - "loss": 0.6616, - "step": 47680 - }, - { - "epoch": 0.4215951484290741, - "grad_norm": 5.252253532409668, - "learning_rate": 4.2973414192848764e-05, - "loss": 0.6492, - "step": 47690 - }, - { - "epoch": 0.4216835516893863, - "grad_norm": 8.8892240524292, - "learning_rate": 4.29719408051769e-05, - "loss": 0.8107, - "step": 47700 - }, - { - "epoch": 0.42177195494969855, - "grad_norm": 1.4994401931762695, - "learning_rate": 4.297046741750503e-05, - "loss": 0.6338, - "step": 47710 - }, - { - "epoch": 0.4218603582100108, - "grad_norm": 2.5555267333984375, - "learning_rate": 4.2968994029833156e-05, - "loss": 0.7034, - "step": 47720 - }, - { - "epoch": 0.421948761470323, - "grad_norm": 7.5677714347839355, - "learning_rate": 4.2967520642161284e-05, - "loss": 0.7261, - "step": 47730 - }, - { - "epoch": 0.42203716473063524, - "grad_norm": 4.397103309631348, - "learning_rate": 4.296604725448942e-05, - "loss": 0.766, - "step": 47740 - }, - { - "epoch": 0.4221255679909475, - "grad_norm": 1.9925589561462402, - "learning_rate": 4.296457386681754e-05, - "loss": 0.6313, - "step": 47750 - }, - { - "epoch": 0.42221397125125976, - "grad_norm": 6.518177032470703, - "learning_rate": 4.2963100479145676e-05, - "loss": 0.8554, - "step": 47760 - }, - { - "epoch": 0.422302374511572, - "grad_norm": 3.5906131267547607, - "learning_rate": 4.2961627091473805e-05, - "loss": 0.7362, - "step": 47770 - }, - { - "epoch": 0.4223907777718842, - "grad_norm": 3.3934285640716553, - "learning_rate": 4.296015370380193e-05, - "loss": 0.7243, - "step": 47780 - }, - { - "epoch": 0.42247918103219645, - "grad_norm": 5.146400451660156, - "learning_rate": 4.295868031613006e-05, - "loss": 0.7424, - "step": 47790 - }, - { - "epoch": 0.4225675842925087, - "grad_norm": 14.254595756530762, - "learning_rate": 4.295720692845819e-05, - "loss": 0.7237, - "step": 47800 - }, - { - "epoch": 0.42265598755282097, - "grad_norm": 4.460474967956543, - "learning_rate": 4.295573354078632e-05, - "loss": 0.7145, - "step": 47810 - }, - { - "epoch": 0.4227443908131332, - "grad_norm": 2.277794599533081, - "learning_rate": 4.295426015311445e-05, - "loss": 0.632, - "step": 47820 - }, - { - "epoch": 0.42283279407344543, - "grad_norm": 3.8273043632507324, - "learning_rate": 4.2952786765442575e-05, - "loss": 0.5973, - "step": 47830 - }, - { - "epoch": 0.42292119733375766, - "grad_norm": 2.9968783855438232, - "learning_rate": 4.295131337777071e-05, - "loss": 0.7608, - "step": 47840 - }, - { - "epoch": 0.4230096005940699, - "grad_norm": 5.234260082244873, - "learning_rate": 4.294983999009884e-05, - "loss": 0.6281, - "step": 47850 - }, - { - "epoch": 0.4230980038543822, - "grad_norm": 13.368101119995117, - "learning_rate": 4.2948366602426967e-05, - "loss": 0.7162, - "step": 47860 - }, - { - "epoch": 0.4231864071146944, - "grad_norm": 1.5968952178955078, - "learning_rate": 4.2946893214755095e-05, - "loss": 0.6554, - "step": 47870 - }, - { - "epoch": 0.42327481037500664, - "grad_norm": 6.714796543121338, - "learning_rate": 4.294541982708323e-05, - "loss": 0.7419, - "step": 47880 - }, - { - "epoch": 0.42336321363531887, - "grad_norm": 6.458313941955566, - "learning_rate": 4.294394643941135e-05, - "loss": 0.6574, - "step": 47890 - }, - { - "epoch": 0.4234516168956311, - "grad_norm": 6.0132951736450195, - "learning_rate": 4.294247305173949e-05, - "loss": 0.8116, - "step": 47900 - }, - { - "epoch": 0.42354002015594333, - "grad_norm": 2.5821726322174072, - "learning_rate": 4.294099966406761e-05, - "loss": 0.6987, - "step": 47910 - }, - { - "epoch": 0.4236284234162556, - "grad_norm": 1.2777179479599, - "learning_rate": 4.2939526276395743e-05, - "loss": 0.6982, - "step": 47920 - }, - { - "epoch": 0.42371682667656785, - "grad_norm": 6.209251403808594, - "learning_rate": 4.293805288872387e-05, - "loss": 0.7179, - "step": 47930 - }, - { - "epoch": 0.4238052299368801, - "grad_norm": 2.6391115188598633, - "learning_rate": 4.2936579501052e-05, - "loss": 0.6967, - "step": 47940 - }, - { - "epoch": 0.4238936331971923, - "grad_norm": 5.176142692565918, - "learning_rate": 4.293510611338013e-05, - "loss": 0.7014, - "step": 47950 - }, - { - "epoch": 0.42398203645750454, - "grad_norm": 3.1526174545288086, - "learning_rate": 4.2933632725708264e-05, - "loss": 0.7503, - "step": 47960 - }, - { - "epoch": 0.42407043971781677, - "grad_norm": 3.8150854110717773, - "learning_rate": 4.2932159338036385e-05, - "loss": 0.6631, - "step": 47970 - }, - { - "epoch": 0.42415884297812906, - "grad_norm": 8.35955810546875, - "learning_rate": 4.293068595036452e-05, - "loss": 0.6546, - "step": 47980 - }, - { - "epoch": 0.4242472462384413, - "grad_norm": 3.098579168319702, - "learning_rate": 4.292921256269265e-05, - "loss": 0.7479, - "step": 47990 - }, - { - "epoch": 0.4243356494987535, - "grad_norm": 1.8033745288848877, - "learning_rate": 4.292773917502078e-05, - "loss": 0.6314, - "step": 48000 - }, - { - "epoch": 0.42442405275906575, - "grad_norm": 3.5809051990509033, - "learning_rate": 4.2926265787348905e-05, - "loss": 0.9129, - "step": 48010 - }, - { - "epoch": 0.424512456019378, - "grad_norm": 1.3962634801864624, - "learning_rate": 4.2924792399677034e-05, - "loss": 0.7015, - "step": 48020 - }, - { - "epoch": 0.4246008592796902, - "grad_norm": 1.5739784240722656, - "learning_rate": 4.292331901200516e-05, - "loss": 0.6883, - "step": 48030 - }, - { - "epoch": 0.4246892625400025, - "grad_norm": 5.076678276062012, - "learning_rate": 4.29218456243333e-05, - "loss": 0.6829, - "step": 48040 - }, - { - "epoch": 0.42477766580031473, - "grad_norm": 2.7974698543548584, - "learning_rate": 4.292037223666142e-05, - "loss": 0.8116, - "step": 48050 - }, - { - "epoch": 0.42486606906062696, - "grad_norm": 2.0378050804138184, - "learning_rate": 4.2918898848989554e-05, - "loss": 0.6363, - "step": 48060 - }, - { - "epoch": 0.4249544723209392, - "grad_norm": 9.311989784240723, - "learning_rate": 4.291742546131768e-05, - "loss": 0.799, - "step": 48070 - }, - { - "epoch": 0.4250428755812514, - "grad_norm": 3.0429904460906982, - "learning_rate": 4.291595207364581e-05, - "loss": 0.7563, - "step": 48080 - }, - { - "epoch": 0.42513127884156365, - "grad_norm": 1.8256853818893433, - "learning_rate": 4.291447868597394e-05, - "loss": 0.7063, - "step": 48090 - }, - { - "epoch": 0.42521968210187594, - "grad_norm": 3.6194562911987305, - "learning_rate": 4.2913005298302074e-05, - "loss": 0.6743, - "step": 48100 - }, - { - "epoch": 0.42530808536218817, - "grad_norm": 1.738796353340149, - "learning_rate": 4.2911531910630196e-05, - "loss": 0.665, - "step": 48110 - }, - { - "epoch": 0.4253964886225004, - "grad_norm": 3.6646478176116943, - "learning_rate": 4.291005852295833e-05, - "loss": 0.6546, - "step": 48120 - }, - { - "epoch": 0.42548489188281263, - "grad_norm": 2.307434320449829, - "learning_rate": 4.290858513528645e-05, - "loss": 0.6931, - "step": 48130 - }, - { - "epoch": 0.42557329514312486, - "grad_norm": 2.7822699546813965, - "learning_rate": 4.290711174761459e-05, - "loss": 0.6119, - "step": 48140 - }, - { - "epoch": 0.4256616984034371, - "grad_norm": 6.795778751373291, - "learning_rate": 4.2905638359942716e-05, - "loss": 0.8332, - "step": 48150 - }, - { - "epoch": 0.4257501016637494, - "grad_norm": 2.296041488647461, - "learning_rate": 4.2904164972270844e-05, - "loss": 0.7512, - "step": 48160 - }, - { - "epoch": 0.4258385049240616, - "grad_norm": 2.6243736743927, - "learning_rate": 4.290269158459897e-05, - "loss": 0.7573, - "step": 48170 - }, - { - "epoch": 0.42592690818437384, - "grad_norm": 2.2302157878875732, - "learning_rate": 4.290121819692711e-05, - "loss": 0.7542, - "step": 48180 - }, - { - "epoch": 0.4260153114446861, - "grad_norm": 3.0792062282562256, - "learning_rate": 4.289974480925523e-05, - "loss": 0.722, - "step": 48190 - }, - { - "epoch": 0.4261037147049983, - "grad_norm": 3.389698028564453, - "learning_rate": 4.2898271421583364e-05, - "loss": 0.746, - "step": 48200 - }, - { - "epoch": 0.42619211796531054, - "grad_norm": 1.883493185043335, - "learning_rate": 4.289679803391149e-05, - "loss": 0.6322, - "step": 48210 - }, - { - "epoch": 0.4262805212256228, - "grad_norm": 7.4424943923950195, - "learning_rate": 4.289532464623962e-05, - "loss": 0.7688, - "step": 48220 - }, - { - "epoch": 0.42636892448593505, - "grad_norm": 5.436248779296875, - "learning_rate": 4.289385125856775e-05, - "loss": 0.7501, - "step": 48230 - }, - { - "epoch": 0.4264573277462473, - "grad_norm": 3.845896005630493, - "learning_rate": 4.2892377870895885e-05, - "loss": 0.5885, - "step": 48240 - }, - { - "epoch": 0.4265457310065595, - "grad_norm": 3.046862840652466, - "learning_rate": 4.2890904483224006e-05, - "loss": 0.782, - "step": 48250 - }, - { - "epoch": 0.42663413426687175, - "grad_norm": 2.5822560787200928, - "learning_rate": 4.288943109555214e-05, - "loss": 0.646, - "step": 48260 - }, - { - "epoch": 0.426722537527184, - "grad_norm": 11.160552024841309, - "learning_rate": 4.288795770788027e-05, - "loss": 0.707, - "step": 48270 - }, - { - "epoch": 0.42681094078749626, - "grad_norm": 2.8353912830352783, - "learning_rate": 4.28864843202084e-05, - "loss": 0.6083, - "step": 48280 - }, - { - "epoch": 0.4268993440478085, - "grad_norm": 10.617039680480957, - "learning_rate": 4.2885010932536526e-05, - "loss": 0.7785, - "step": 48290 - }, - { - "epoch": 0.4269877473081207, - "grad_norm": 5.462192058563232, - "learning_rate": 4.2883537544864655e-05, - "loss": 0.8402, - "step": 48300 - }, - { - "epoch": 0.42707615056843296, - "grad_norm": 2.9955265522003174, - "learning_rate": 4.288206415719278e-05, - "loss": 0.728, - "step": 48310 - }, - { - "epoch": 0.4271645538287452, - "grad_norm": 2.647054672241211, - "learning_rate": 4.288059076952092e-05, - "loss": 0.7728, - "step": 48320 - }, - { - "epoch": 0.4272529570890574, - "grad_norm": 5.712986946105957, - "learning_rate": 4.2879117381849047e-05, - "loss": 0.7458, - "step": 48330 - }, - { - "epoch": 0.4273413603493697, - "grad_norm": 3.097459316253662, - "learning_rate": 4.2877643994177175e-05, - "loss": 0.6847, - "step": 48340 - }, - { - "epoch": 0.42742976360968193, - "grad_norm": 4.371756076812744, - "learning_rate": 4.28761706065053e-05, - "loss": 0.6855, - "step": 48350 - }, - { - "epoch": 0.42751816686999417, - "grad_norm": 1.550958514213562, - "learning_rate": 4.287469721883343e-05, - "loss": 0.5489, - "step": 48360 - }, - { - "epoch": 0.4276065701303064, - "grad_norm": 1.1235427856445312, - "learning_rate": 4.287322383116156e-05, - "loss": 0.7475, - "step": 48370 - }, - { - "epoch": 0.4276949733906186, - "grad_norm": 4.641275882720947, - "learning_rate": 4.287175044348969e-05, - "loss": 0.7154, - "step": 48380 - }, - { - "epoch": 0.4277833766509309, - "grad_norm": 3.2773590087890625, - "learning_rate": 4.2870277055817823e-05, - "loss": 0.7551, - "step": 48390 - }, - { - "epoch": 0.42787177991124314, - "grad_norm": 1.6866613626480103, - "learning_rate": 4.286880366814595e-05, - "loss": 0.771, - "step": 48400 - }, - { - "epoch": 0.4279601831715554, - "grad_norm": 10.191542625427246, - "learning_rate": 4.286733028047408e-05, - "loss": 0.7619, - "step": 48410 - }, - { - "epoch": 0.4280485864318676, - "grad_norm": 3.14107084274292, - "learning_rate": 4.286585689280221e-05, - "loss": 0.6854, - "step": 48420 - }, - { - "epoch": 0.42813698969217984, - "grad_norm": 2.8374431133270264, - "learning_rate": 4.286438350513034e-05, - "loss": 0.6736, - "step": 48430 - }, - { - "epoch": 0.42822539295249207, - "grad_norm": 11.783573150634766, - "learning_rate": 4.2862910117458465e-05, - "loss": 0.7313, - "step": 48440 - }, - { - "epoch": 0.42831379621280435, - "grad_norm": 2.8745875358581543, - "learning_rate": 4.28614367297866e-05, - "loss": 0.7187, - "step": 48450 - }, - { - "epoch": 0.4284021994731166, - "grad_norm": 10.306466102600098, - "learning_rate": 4.285996334211473e-05, - "loss": 0.7406, - "step": 48460 - }, - { - "epoch": 0.4284906027334288, - "grad_norm": 4.1658735275268555, - "learning_rate": 4.285848995444286e-05, - "loss": 0.7009, - "step": 48470 - }, - { - "epoch": 0.42857900599374105, - "grad_norm": 4.120835781097412, - "learning_rate": 4.2857016566770985e-05, - "loss": 0.7448, - "step": 48480 - }, - { - "epoch": 0.4286674092540533, - "grad_norm": 1.7996022701263428, - "learning_rate": 4.2855543179099114e-05, - "loss": 0.6167, - "step": 48490 - }, - { - "epoch": 0.4287558125143655, - "grad_norm": 14.082418441772461, - "learning_rate": 4.285406979142724e-05, - "loss": 0.7737, - "step": 48500 - }, - { - "epoch": 0.4288442157746778, - "grad_norm": 3.8586723804473877, - "learning_rate": 4.285259640375538e-05, - "loss": 0.6981, - "step": 48510 - }, - { - "epoch": 0.42893261903499, - "grad_norm": 2.0205917358398438, - "learning_rate": 4.28511230160835e-05, - "loss": 0.6336, - "step": 48520 - }, - { - "epoch": 0.42902102229530226, - "grad_norm": 2.4731831550598145, - "learning_rate": 4.2849649628411634e-05, - "loss": 0.7425, - "step": 48530 - }, - { - "epoch": 0.4291094255556145, - "grad_norm": 7.64461088180542, - "learning_rate": 4.284817624073976e-05, - "loss": 0.8316, - "step": 48540 - }, - { - "epoch": 0.4291978288159267, - "grad_norm": 6.518263816833496, - "learning_rate": 4.284670285306789e-05, - "loss": 0.7612, - "step": 48550 - }, - { - "epoch": 0.42928623207623895, - "grad_norm": 3.602928638458252, - "learning_rate": 4.284522946539602e-05, - "loss": 0.7646, - "step": 48560 - }, - { - "epoch": 0.42937463533655124, - "grad_norm": 1.4430686235427856, - "learning_rate": 4.2843756077724154e-05, - "loss": 0.6019, - "step": 48570 - }, - { - "epoch": 0.42946303859686347, - "grad_norm": 2.056321144104004, - "learning_rate": 4.2842282690052276e-05, - "loss": 0.734, - "step": 48580 - }, - { - "epoch": 0.4295514418571757, - "grad_norm": 8.859973907470703, - "learning_rate": 4.284080930238041e-05, - "loss": 0.5719, - "step": 48590 - }, - { - "epoch": 0.42963984511748793, - "grad_norm": 1.0789034366607666, - "learning_rate": 4.283933591470853e-05, - "loss": 0.8037, - "step": 48600 - }, - { - "epoch": 0.42972824837780016, - "grad_norm": 9.667101860046387, - "learning_rate": 4.283786252703667e-05, - "loss": 0.6936, - "step": 48610 - }, - { - "epoch": 0.4298166516381124, - "grad_norm": 4.853716850280762, - "learning_rate": 4.2836389139364796e-05, - "loss": 0.8355, - "step": 48620 - }, - { - "epoch": 0.4299050548984247, - "grad_norm": 2.299448251724243, - "learning_rate": 4.2834915751692924e-05, - "loss": 0.7775, - "step": 48630 - }, - { - "epoch": 0.4299934581587369, - "grad_norm": 5.701653003692627, - "learning_rate": 4.283344236402105e-05, - "loss": 0.6251, - "step": 48640 - }, - { - "epoch": 0.43008186141904914, - "grad_norm": 4.0857834815979, - "learning_rate": 4.283196897634919e-05, - "loss": 0.6859, - "step": 48650 - }, - { - "epoch": 0.43017026467936137, - "grad_norm": 2.6716785430908203, - "learning_rate": 4.283049558867731e-05, - "loss": 0.7045, - "step": 48660 - }, - { - "epoch": 0.4302586679396736, - "grad_norm": 3.9186277389526367, - "learning_rate": 4.2829022201005445e-05, - "loss": 0.6712, - "step": 48670 - }, - { - "epoch": 0.43034707119998583, - "grad_norm": 1.6243484020233154, - "learning_rate": 4.282754881333357e-05, - "loss": 0.7508, - "step": 48680 - }, - { - "epoch": 0.4304354744602981, - "grad_norm": 2.530704975128174, - "learning_rate": 4.28260754256617e-05, - "loss": 0.7329, - "step": 48690 - }, - { - "epoch": 0.43052387772061035, - "grad_norm": 4.32342529296875, - "learning_rate": 4.282460203798983e-05, - "loss": 0.8491, - "step": 48700 - }, - { - "epoch": 0.4306122809809226, - "grad_norm": 1.8311306238174438, - "learning_rate": 4.2823128650317965e-05, - "loss": 0.7729, - "step": 48710 - }, - { - "epoch": 0.4307006842412348, - "grad_norm": 2.167034149169922, - "learning_rate": 4.2821655262646086e-05, - "loss": 0.764, - "step": 48720 - }, - { - "epoch": 0.43078908750154704, - "grad_norm": 4.249022006988525, - "learning_rate": 4.282018187497422e-05, - "loss": 0.7243, - "step": 48730 - }, - { - "epoch": 0.43087749076185927, - "grad_norm": 2.4285356998443604, - "learning_rate": 4.281870848730234e-05, - "loss": 0.8625, - "step": 48740 - }, - { - "epoch": 0.43096589402217156, - "grad_norm": 2.9705419540405273, - "learning_rate": 4.281723509963048e-05, - "loss": 0.6557, - "step": 48750 - }, - { - "epoch": 0.4310542972824838, - "grad_norm": 3.2237141132354736, - "learning_rate": 4.2815761711958606e-05, - "loss": 0.855, - "step": 48760 - }, - { - "epoch": 0.431142700542796, - "grad_norm": 8.296978950500488, - "learning_rate": 4.2814288324286735e-05, - "loss": 0.7049, - "step": 48770 - }, - { - "epoch": 0.43123110380310825, - "grad_norm": 10.160294532775879, - "learning_rate": 4.281281493661486e-05, - "loss": 0.8685, - "step": 48780 - }, - { - "epoch": 0.4313195070634205, - "grad_norm": 2.1390461921691895, - "learning_rate": 4.2811341548943e-05, - "loss": 0.6697, - "step": 48790 - }, - { - "epoch": 0.4314079103237327, - "grad_norm": 5.534269332885742, - "learning_rate": 4.280986816127112e-05, - "loss": 0.6721, - "step": 48800 - }, - { - "epoch": 0.431496313584045, - "grad_norm": 11.164871215820312, - "learning_rate": 4.2808394773599255e-05, - "loss": 0.7006, - "step": 48810 - }, - { - "epoch": 0.43158471684435723, - "grad_norm": 6.385516166687012, - "learning_rate": 4.280692138592738e-05, - "loss": 0.7559, - "step": 48820 - }, - { - "epoch": 0.43167312010466946, - "grad_norm": 7.103679180145264, - "learning_rate": 4.280544799825551e-05, - "loss": 0.5955, - "step": 48830 - }, - { - "epoch": 0.4317615233649817, - "grad_norm": 1.2492018938064575, - "learning_rate": 4.280397461058364e-05, - "loss": 0.4904, - "step": 48840 - }, - { - "epoch": 0.4318499266252939, - "grad_norm": 1.7535746097564697, - "learning_rate": 4.280250122291177e-05, - "loss": 0.7749, - "step": 48850 - }, - { - "epoch": 0.43193832988560615, - "grad_norm": 8.345301628112793, - "learning_rate": 4.28010278352399e-05, - "loss": 0.6524, - "step": 48860 - }, - { - "epoch": 0.43202673314591844, - "grad_norm": 5.098632335662842, - "learning_rate": 4.279955444756803e-05, - "loss": 0.6604, - "step": 48870 - }, - { - "epoch": 0.43211513640623067, - "grad_norm": 11.396991729736328, - "learning_rate": 4.2798081059896153e-05, - "loss": 0.7243, - "step": 48880 - }, - { - "epoch": 0.4322035396665429, - "grad_norm": 3.8040575981140137, - "learning_rate": 4.279660767222429e-05, - "loss": 0.8036, - "step": 48890 - }, - { - "epoch": 0.43229194292685513, - "grad_norm": 7.881913661956787, - "learning_rate": 4.279513428455242e-05, - "loss": 0.6083, - "step": 48900 - }, - { - "epoch": 0.43238034618716736, - "grad_norm": 4.152080059051514, - "learning_rate": 4.2793660896880545e-05, - "loss": 0.8232, - "step": 48910 - }, - { - "epoch": 0.43246874944747965, - "grad_norm": 4.705545902252197, - "learning_rate": 4.2792187509208674e-05, - "loss": 0.8514, - "step": 48920 - }, - { - "epoch": 0.4325571527077919, - "grad_norm": 5.913174152374268, - "learning_rate": 4.279071412153681e-05, - "loss": 0.665, - "step": 48930 - }, - { - "epoch": 0.4326455559681041, - "grad_norm": 4.060830593109131, - "learning_rate": 4.278924073386493e-05, - "loss": 0.7206, - "step": 48940 - }, - { - "epoch": 0.43273395922841634, - "grad_norm": 2.5899369716644287, - "learning_rate": 4.2787767346193066e-05, - "loss": 0.6992, - "step": 48950 - }, - { - "epoch": 0.4328223624887286, - "grad_norm": 5.40007209777832, - "learning_rate": 4.278629395852119e-05, - "loss": 0.6133, - "step": 48960 - }, - { - "epoch": 0.4329107657490408, - "grad_norm": 2.1534299850463867, - "learning_rate": 4.278482057084932e-05, - "loss": 0.7526, - "step": 48970 - }, - { - "epoch": 0.4329991690093531, - "grad_norm": 7.855589389801025, - "learning_rate": 4.278334718317745e-05, - "loss": 0.7828, - "step": 48980 - }, - { - "epoch": 0.4330875722696653, - "grad_norm": 4.2604570388793945, - "learning_rate": 4.278187379550558e-05, - "loss": 0.7934, - "step": 48990 - }, - { - "epoch": 0.43317597552997755, - "grad_norm": 1.6459400653839111, - "learning_rate": 4.278040040783371e-05, - "loss": 0.6258, - "step": 49000 - }, - { - "epoch": 0.4332643787902898, - "grad_norm": 5.904418468475342, - "learning_rate": 4.277892702016184e-05, - "loss": 0.7951, - "step": 49010 - }, - { - "epoch": 0.433352782050602, - "grad_norm": 4.660916328430176, - "learning_rate": 4.2777453632489964e-05, - "loss": 0.7777, - "step": 49020 - }, - { - "epoch": 0.43344118531091425, - "grad_norm": 1.808325171470642, - "learning_rate": 4.27759802448181e-05, - "loss": 0.7035, - "step": 49030 - }, - { - "epoch": 0.43352958857122653, - "grad_norm": 7.488389015197754, - "learning_rate": 4.277450685714623e-05, - "loss": 0.7222, - "step": 49040 - }, - { - "epoch": 0.43361799183153876, - "grad_norm": 2.403414249420166, - "learning_rate": 4.2773033469474356e-05, - "loss": 0.7446, - "step": 49050 - }, - { - "epoch": 0.433706395091851, - "grad_norm": 4.051290512084961, - "learning_rate": 4.2771560081802484e-05, - "loss": 0.6622, - "step": 49060 - }, - { - "epoch": 0.4337947983521632, - "grad_norm": 2.8323144912719727, - "learning_rate": 4.277008669413061e-05, - "loss": 0.6099, - "step": 49070 - }, - { - "epoch": 0.43388320161247546, - "grad_norm": 1.4692400693893433, - "learning_rate": 4.276861330645874e-05, - "loss": 0.6458, - "step": 49080 - }, - { - "epoch": 0.4339716048727877, - "grad_norm": 7.437715530395508, - "learning_rate": 4.2767139918786876e-05, - "loss": 0.7714, - "step": 49090 - }, - { - "epoch": 0.4340600081331, - "grad_norm": 1.789042592048645, - "learning_rate": 4.2765666531115e-05, - "loss": 0.7629, - "step": 49100 - }, - { - "epoch": 0.4341484113934122, - "grad_norm": 9.832332611083984, - "learning_rate": 4.276419314344313e-05, - "loss": 0.8524, - "step": 49110 - }, - { - "epoch": 0.43423681465372443, - "grad_norm": 4.003662109375, - "learning_rate": 4.276271975577126e-05, - "loss": 0.713, - "step": 49120 - }, - { - "epoch": 0.43432521791403667, - "grad_norm": 4.436911106109619, - "learning_rate": 4.276124636809939e-05, - "loss": 0.7281, - "step": 49130 - }, - { - "epoch": 0.4344136211743489, - "grad_norm": 3.5087971687316895, - "learning_rate": 4.275977298042752e-05, - "loss": 0.7506, - "step": 49140 - }, - { - "epoch": 0.4345020244346611, - "grad_norm": 3.611255407333374, - "learning_rate": 4.275829959275565e-05, - "loss": 0.6588, - "step": 49150 - }, - { - "epoch": 0.4345904276949734, - "grad_norm": 8.180458068847656, - "learning_rate": 4.2756826205083775e-05, - "loss": 0.7707, - "step": 49160 - }, - { - "epoch": 0.43467883095528564, - "grad_norm": 6.969361782073975, - "learning_rate": 4.275535281741191e-05, - "loss": 0.7826, - "step": 49170 - }, - { - "epoch": 0.4347672342155979, - "grad_norm": 10.904851913452148, - "learning_rate": 4.275387942974004e-05, - "loss": 0.7563, - "step": 49180 - }, - { - "epoch": 0.4348556374759101, - "grad_norm": 6.551028728485107, - "learning_rate": 4.2752406042068166e-05, - "loss": 0.6823, - "step": 49190 - }, - { - "epoch": 0.43494404073622234, - "grad_norm": 2.8073883056640625, - "learning_rate": 4.2750932654396295e-05, - "loss": 0.7997, - "step": 49200 - }, - { - "epoch": 0.43503244399653457, - "grad_norm": 1.836535930633545, - "learning_rate": 4.274945926672442e-05, - "loss": 0.6028, - "step": 49210 - }, - { - "epoch": 0.43512084725684685, - "grad_norm": 2.6261870861053467, - "learning_rate": 4.274798587905255e-05, - "loss": 0.6354, - "step": 49220 - }, - { - "epoch": 0.4352092505171591, - "grad_norm": 1.8312478065490723, - "learning_rate": 4.2746512491380687e-05, - "loss": 0.7721, - "step": 49230 - }, - { - "epoch": 0.4352976537774713, - "grad_norm": 3.289795160293579, - "learning_rate": 4.2745039103708815e-05, - "loss": 0.7262, - "step": 49240 - }, - { - "epoch": 0.43538605703778355, - "grad_norm": 2.8596582412719727, - "learning_rate": 4.274356571603694e-05, - "loss": 0.6417, - "step": 49250 - }, - { - "epoch": 0.4354744602980958, - "grad_norm": 4.209091663360596, - "learning_rate": 4.274209232836507e-05, - "loss": 0.5637, - "step": 49260 - }, - { - "epoch": 0.435562863558408, - "grad_norm": 3.2064123153686523, - "learning_rate": 4.27406189406932e-05, - "loss": 0.7819, - "step": 49270 - }, - { - "epoch": 0.4356512668187203, - "grad_norm": 3.832395553588867, - "learning_rate": 4.273914555302133e-05, - "loss": 0.8224, - "step": 49280 - }, - { - "epoch": 0.4357396700790325, - "grad_norm": 14.278970718383789, - "learning_rate": 4.2737672165349463e-05, - "loss": 0.8477, - "step": 49290 - }, - { - "epoch": 0.43582807333934476, - "grad_norm": 7.994842052459717, - "learning_rate": 4.273619877767759e-05, - "loss": 0.6582, - "step": 49300 - }, - { - "epoch": 0.435916476599657, - "grad_norm": 3.741403102874756, - "learning_rate": 4.273472539000572e-05, - "loss": 0.6977, - "step": 49310 - }, - { - "epoch": 0.4360048798599692, - "grad_norm": 2.5595083236694336, - "learning_rate": 4.273325200233385e-05, - "loss": 0.7516, - "step": 49320 - }, - { - "epoch": 0.43609328312028145, - "grad_norm": 7.676196575164795, - "learning_rate": 4.273177861466198e-05, - "loss": 0.8734, - "step": 49330 - }, - { - "epoch": 0.43618168638059374, - "grad_norm": 3.580632448196411, - "learning_rate": 4.2730305226990105e-05, - "loss": 0.7638, - "step": 49340 - }, - { - "epoch": 0.43627008964090597, - "grad_norm": 6.204030990600586, - "learning_rate": 4.2728831839318234e-05, - "loss": 0.705, - "step": 49350 - }, - { - "epoch": 0.4363584929012182, - "grad_norm": 2.959484815597534, - "learning_rate": 4.272735845164637e-05, - "loss": 0.7366, - "step": 49360 - }, - { - "epoch": 0.43644689616153043, - "grad_norm": 3.1605892181396484, - "learning_rate": 4.27258850639745e-05, - "loss": 0.7496, - "step": 49370 - }, - { - "epoch": 0.43653529942184266, - "grad_norm": 6.258642196655273, - "learning_rate": 4.2724411676302625e-05, - "loss": 0.6651, - "step": 49380 - }, - { - "epoch": 0.4366237026821549, - "grad_norm": 5.046906471252441, - "learning_rate": 4.2722938288630754e-05, - "loss": 0.8945, - "step": 49390 - }, - { - "epoch": 0.4367121059424672, - "grad_norm": 3.16194224357605, - "learning_rate": 4.272146490095888e-05, - "loss": 0.682, - "step": 49400 - }, - { - "epoch": 0.4368005092027794, - "grad_norm": 3.2964229583740234, - "learning_rate": 4.271999151328701e-05, - "loss": 0.7375, - "step": 49410 - }, - { - "epoch": 0.43688891246309164, - "grad_norm": 2.3401637077331543, - "learning_rate": 4.2718518125615146e-05, - "loss": 0.6115, - "step": 49420 - }, - { - "epoch": 0.43697731572340387, - "grad_norm": 2.230109453201294, - "learning_rate": 4.271704473794327e-05, - "loss": 0.8274, - "step": 49430 - }, - { - "epoch": 0.4370657189837161, - "grad_norm": 6.246984481811523, - "learning_rate": 4.27155713502714e-05, - "loss": 0.6711, - "step": 49440 - }, - { - "epoch": 0.4371541222440284, - "grad_norm": 2.381495475769043, - "learning_rate": 4.271409796259953e-05, - "loss": 0.6404, - "step": 49450 - }, - { - "epoch": 0.4372425255043406, - "grad_norm": 4.55625057220459, - "learning_rate": 4.271262457492766e-05, - "loss": 0.8701, - "step": 49460 - }, - { - "epoch": 0.43733092876465285, - "grad_norm": 1.407307744026184, - "learning_rate": 4.271115118725579e-05, - "loss": 0.78, - "step": 49470 - }, - { - "epoch": 0.4374193320249651, - "grad_norm": 1.2274616956710815, - "learning_rate": 4.270967779958392e-05, - "loss": 0.7828, - "step": 49480 - }, - { - "epoch": 0.4375077352852773, - "grad_norm": 4.4979987144470215, - "learning_rate": 4.2708204411912044e-05, - "loss": 0.7073, - "step": 49490 - }, - { - "epoch": 0.43759613854558954, - "grad_norm": 2.6861391067504883, - "learning_rate": 4.270673102424018e-05, - "loss": 0.7902, - "step": 49500 - }, - { - "epoch": 0.43768454180590183, - "grad_norm": 4.484062194824219, - "learning_rate": 4.270525763656831e-05, - "loss": 0.6514, - "step": 49510 - }, - { - "epoch": 0.43777294506621406, - "grad_norm": 8.268613815307617, - "learning_rate": 4.2703784248896436e-05, - "loss": 0.6924, - "step": 49520 - }, - { - "epoch": 0.4378613483265263, - "grad_norm": 4.025659561157227, - "learning_rate": 4.2702310861224564e-05, - "loss": 0.7731, - "step": 49530 - }, - { - "epoch": 0.4379497515868385, - "grad_norm": 4.77163553237915, - "learning_rate": 4.270083747355269e-05, - "loss": 0.7548, - "step": 49540 - }, - { - "epoch": 0.43803815484715075, - "grad_norm": 4.086894989013672, - "learning_rate": 4.269936408588082e-05, - "loss": 0.7109, - "step": 49550 - }, - { - "epoch": 0.438126558107463, - "grad_norm": 3.181554079055786, - "learning_rate": 4.2697890698208956e-05, - "loss": 0.6558, - "step": 49560 - }, - { - "epoch": 0.43821496136777527, - "grad_norm": 2.358431100845337, - "learning_rate": 4.269641731053708e-05, - "loss": 0.6314, - "step": 49570 - }, - { - "epoch": 0.4383033646280875, - "grad_norm": 2.7088799476623535, - "learning_rate": 4.269494392286521e-05, - "loss": 0.624, - "step": 49580 - }, - { - "epoch": 0.43839176788839973, - "grad_norm": 6.590661525726318, - "learning_rate": 4.269347053519334e-05, - "loss": 0.7938, - "step": 49590 - }, - { - "epoch": 0.43848017114871196, - "grad_norm": 1.3354746103286743, - "learning_rate": 4.269199714752147e-05, - "loss": 0.6071, - "step": 49600 - }, - { - "epoch": 0.4385685744090242, - "grad_norm": 3.4821600914001465, - "learning_rate": 4.26905237598496e-05, - "loss": 0.746, - "step": 49610 - }, - { - "epoch": 0.4386569776693364, - "grad_norm": 2.755751371383667, - "learning_rate": 4.268905037217773e-05, - "loss": 0.6226, - "step": 49620 - }, - { - "epoch": 0.4387453809296487, - "grad_norm": 5.020684242248535, - "learning_rate": 4.2687576984505855e-05, - "loss": 0.8043, - "step": 49630 - }, - { - "epoch": 0.43883378418996094, - "grad_norm": 6.5135674476623535, - "learning_rate": 4.268610359683399e-05, - "loss": 0.7847, - "step": 49640 - }, - { - "epoch": 0.43892218745027317, - "grad_norm": 5.412643909454346, - "learning_rate": 4.268463020916212e-05, - "loss": 0.7045, - "step": 49650 - }, - { - "epoch": 0.4390105907105854, - "grad_norm": 3.9150893688201904, - "learning_rate": 4.2683156821490246e-05, - "loss": 0.7433, - "step": 49660 - }, - { - "epoch": 0.43909899397089763, - "grad_norm": 7.079890251159668, - "learning_rate": 4.2681683433818375e-05, - "loss": 0.6557, - "step": 49670 - }, - { - "epoch": 0.43918739723120986, - "grad_norm": 2.9877278804779053, - "learning_rate": 4.26802100461465e-05, - "loss": 0.7075, - "step": 49680 - }, - { - "epoch": 0.43927580049152215, - "grad_norm": 1.8254339694976807, - "learning_rate": 4.267873665847463e-05, - "loss": 0.609, - "step": 49690 - }, - { - "epoch": 0.4393642037518344, - "grad_norm": 8.803850173950195, - "learning_rate": 4.2677263270802767e-05, - "loss": 0.8693, - "step": 49700 - }, - { - "epoch": 0.4394526070121466, - "grad_norm": 5.286747455596924, - "learning_rate": 4.267578988313089e-05, - "loss": 0.6831, - "step": 49710 - }, - { - "epoch": 0.43954101027245884, - "grad_norm": 4.2131147384643555, - "learning_rate": 4.267431649545902e-05, - "loss": 0.589, - "step": 49720 - }, - { - "epoch": 0.4396294135327711, - "grad_norm": 2.2544007301330566, - "learning_rate": 4.267284310778715e-05, - "loss": 0.6905, - "step": 49730 - }, - { - "epoch": 0.4397178167930833, - "grad_norm": 2.0811386108398438, - "learning_rate": 4.267136972011528e-05, - "loss": 0.722, - "step": 49740 - }, - { - "epoch": 0.4398062200533956, - "grad_norm": 1.8482768535614014, - "learning_rate": 4.266989633244341e-05, - "loss": 0.8377, - "step": 49750 - }, - { - "epoch": 0.4398946233137078, - "grad_norm": 3.3543198108673096, - "learning_rate": 4.2668422944771544e-05, - "loss": 0.7036, - "step": 49760 - }, - { - "epoch": 0.43998302657402005, - "grad_norm": 4.920956134796143, - "learning_rate": 4.2666949557099665e-05, - "loss": 0.7653, - "step": 49770 - }, - { - "epoch": 0.4400714298343323, - "grad_norm": 15.025528907775879, - "learning_rate": 4.26654761694278e-05, - "loss": 0.906, - "step": 49780 - }, - { - "epoch": 0.4401598330946445, - "grad_norm": 6.583856105804443, - "learning_rate": 4.266400278175592e-05, - "loss": 0.6997, - "step": 49790 - }, - { - "epoch": 0.44024823635495675, - "grad_norm": 6.270252227783203, - "learning_rate": 4.266252939408406e-05, - "loss": 0.7197, - "step": 49800 - }, - { - "epoch": 0.44033663961526903, - "grad_norm": 1.6152703762054443, - "learning_rate": 4.2661056006412185e-05, - "loss": 0.6435, - "step": 49810 - }, - { - "epoch": 0.44042504287558126, - "grad_norm": 1.472029209136963, - "learning_rate": 4.2659582618740314e-05, - "loss": 0.6587, - "step": 49820 - }, - { - "epoch": 0.4405134461358935, - "grad_norm": 6.564108371734619, - "learning_rate": 4.265810923106844e-05, - "loss": 0.7238, - "step": 49830 - }, - { - "epoch": 0.4406018493962057, - "grad_norm": 3.803205966949463, - "learning_rate": 4.265663584339658e-05, - "loss": 0.6642, - "step": 49840 - }, - { - "epoch": 0.44069025265651796, - "grad_norm": 2.6269888877868652, - "learning_rate": 4.26551624557247e-05, - "loss": 0.7132, - "step": 49850 - }, - { - "epoch": 0.4407786559168302, - "grad_norm": 4.599565029144287, - "learning_rate": 4.2653689068052834e-05, - "loss": 0.8978, - "step": 49860 - }, - { - "epoch": 0.4408670591771425, - "grad_norm": 8.206945419311523, - "learning_rate": 4.265221568038096e-05, - "loss": 0.7625, - "step": 49870 - }, - { - "epoch": 0.4409554624374547, - "grad_norm": 2.6349050998687744, - "learning_rate": 4.265074229270909e-05, - "loss": 0.7431, - "step": 49880 - }, - { - "epoch": 0.44104386569776693, - "grad_norm": 2.3383517265319824, - "learning_rate": 4.264926890503722e-05, - "loss": 0.6219, - "step": 49890 - }, - { - "epoch": 0.44113226895807917, - "grad_norm": 1.913715124130249, - "learning_rate": 4.264779551736535e-05, - "loss": 0.7326, - "step": 49900 - }, - { - "epoch": 0.4412206722183914, - "grad_norm": 11.836853981018066, - "learning_rate": 4.2646322129693476e-05, - "loss": 0.6723, - "step": 49910 - }, - { - "epoch": 0.4413090754787036, - "grad_norm": 6.696552753448486, - "learning_rate": 4.264484874202161e-05, - "loss": 0.7211, - "step": 49920 - }, - { - "epoch": 0.4413974787390159, - "grad_norm": 2.5942704677581787, - "learning_rate": 4.264337535434973e-05, - "loss": 0.7202, - "step": 49930 - }, - { - "epoch": 0.44148588199932814, - "grad_norm": 5.038695812225342, - "learning_rate": 4.264190196667787e-05, - "loss": 0.7669, - "step": 49940 - }, - { - "epoch": 0.4415742852596404, - "grad_norm": 4.409718036651611, - "learning_rate": 4.2640428579005996e-05, - "loss": 0.7907, - "step": 49950 - }, - { - "epoch": 0.4416626885199526, - "grad_norm": 2.8787426948547363, - "learning_rate": 4.2638955191334124e-05, - "loss": 0.6615, - "step": 49960 - }, - { - "epoch": 0.44175109178026484, - "grad_norm": 2.493668556213379, - "learning_rate": 4.263748180366225e-05, - "loss": 0.7624, - "step": 49970 - }, - { - "epoch": 0.4418394950405771, - "grad_norm": 1.4627968072891235, - "learning_rate": 4.263600841599039e-05, - "loss": 0.6539, - "step": 49980 - }, - { - "epoch": 0.44192789830088935, - "grad_norm": 4.599356174468994, - "learning_rate": 4.263453502831851e-05, - "loss": 0.7396, - "step": 49990 - }, - { - "epoch": 0.4420163015612016, - "grad_norm": 4.267996788024902, - "learning_rate": 4.2633061640646644e-05, - "loss": 0.7359, - "step": 50000 - }, - { - "epoch": 0.4421047048215138, - "grad_norm": 5.884395599365234, - "learning_rate": 4.2631588252974766e-05, - "loss": 0.7275, - "step": 50010 - }, - { - "epoch": 0.44219310808182605, - "grad_norm": 2.460977792739868, - "learning_rate": 4.26301148653029e-05, - "loss": 0.5478, - "step": 50020 - }, - { - "epoch": 0.4422815113421383, - "grad_norm": 5.67397928237915, - "learning_rate": 4.262864147763103e-05, - "loss": 0.6254, - "step": 50030 - }, - { - "epoch": 0.44236991460245056, - "grad_norm": 3.4750771522521973, - "learning_rate": 4.262716808995916e-05, - "loss": 0.7862, - "step": 50040 - }, - { - "epoch": 0.4424583178627628, - "grad_norm": 1.5983625650405884, - "learning_rate": 4.2625694702287286e-05, - "loss": 0.6876, - "step": 50050 - }, - { - "epoch": 0.442546721123075, - "grad_norm": 3.5901355743408203, - "learning_rate": 4.262422131461542e-05, - "loss": 0.7766, - "step": 50060 - }, - { - "epoch": 0.44263512438338726, - "grad_norm": 5.239259719848633, - "learning_rate": 4.262274792694354e-05, - "loss": 0.7626, - "step": 50070 - }, - { - "epoch": 0.4427235276436995, - "grad_norm": 1.8367315530776978, - "learning_rate": 4.262127453927168e-05, - "loss": 0.7592, - "step": 50080 - }, - { - "epoch": 0.4428119309040117, - "grad_norm": 4.611416339874268, - "learning_rate": 4.2619801151599806e-05, - "loss": 0.6571, - "step": 50090 - }, - { - "epoch": 0.442900334164324, - "grad_norm": 5.311549186706543, - "learning_rate": 4.2618327763927935e-05, - "loss": 0.7994, - "step": 50100 - }, - { - "epoch": 0.44298873742463624, - "grad_norm": 7.112986087799072, - "learning_rate": 4.261685437625606e-05, - "loss": 0.6723, - "step": 50110 - }, - { - "epoch": 0.44307714068494847, - "grad_norm": 2.7187156677246094, - "learning_rate": 4.26153809885842e-05, - "loss": 0.7939, - "step": 50120 - }, - { - "epoch": 0.4431655439452607, - "grad_norm": 5.719393730163574, - "learning_rate": 4.261390760091232e-05, - "loss": 0.7802, - "step": 50130 - }, - { - "epoch": 0.44325394720557293, - "grad_norm": 3.5198893547058105, - "learning_rate": 4.2612434213240455e-05, - "loss": 0.636, - "step": 50140 - }, - { - "epoch": 0.44334235046588516, - "grad_norm": 6.148463726043701, - "learning_rate": 4.261096082556858e-05, - "loss": 0.5975, - "step": 50150 - }, - { - "epoch": 0.44343075372619745, - "grad_norm": 1.562666654586792, - "learning_rate": 4.260948743789671e-05, - "loss": 0.7161, - "step": 50160 - }, - { - "epoch": 0.4435191569865097, - "grad_norm": 5.703946113586426, - "learning_rate": 4.260801405022484e-05, - "loss": 0.6523, - "step": 50170 - }, - { - "epoch": 0.4436075602468219, - "grad_norm": 2.8563754558563232, - "learning_rate": 4.260654066255297e-05, - "loss": 0.7661, - "step": 50180 - }, - { - "epoch": 0.44369596350713414, - "grad_norm": 2.9135634899139404, - "learning_rate": 4.2605067274881097e-05, - "loss": 0.8834, - "step": 50190 - }, - { - "epoch": 0.44378436676744637, - "grad_norm": 5.729390621185303, - "learning_rate": 4.260359388720923e-05, - "loss": 0.7242, - "step": 50200 - }, - { - "epoch": 0.4438727700277586, - "grad_norm": 4.321021556854248, - "learning_rate": 4.260212049953736e-05, - "loss": 0.6896, - "step": 50210 - }, - { - "epoch": 0.4439611732880709, - "grad_norm": 4.484278202056885, - "learning_rate": 4.260064711186549e-05, - "loss": 0.6525, - "step": 50220 - }, - { - "epoch": 0.4440495765483831, - "grad_norm": 19.692768096923828, - "learning_rate": 4.259917372419362e-05, - "loss": 0.7327, - "step": 50230 - }, - { - "epoch": 0.44413797980869535, - "grad_norm": 9.12367057800293, - "learning_rate": 4.2597700336521745e-05, - "loss": 0.7455, - "step": 50240 - }, - { - "epoch": 0.4442263830690076, - "grad_norm": 4.884012222290039, - "learning_rate": 4.2596226948849874e-05, - "loss": 0.6688, - "step": 50250 - }, - { - "epoch": 0.4443147863293198, - "grad_norm": 1.6866464614868164, - "learning_rate": 4.2594753561178e-05, - "loss": 0.6921, - "step": 50260 - }, - { - "epoch": 0.44440318958963204, - "grad_norm": 4.648808479309082, - "learning_rate": 4.259328017350614e-05, - "loss": 0.6725, - "step": 50270 - }, - { - "epoch": 0.44449159284994433, - "grad_norm": 6.333119869232178, - "learning_rate": 4.2591806785834265e-05, - "loss": 0.793, - "step": 50280 - }, - { - "epoch": 0.44457999611025656, - "grad_norm": 2.2390098571777344, - "learning_rate": 4.2590333398162394e-05, - "loss": 0.7509, - "step": 50290 - }, - { - "epoch": 0.4446683993705688, - "grad_norm": 8.741256713867188, - "learning_rate": 4.258886001049052e-05, - "loss": 0.7244, - "step": 50300 - }, - { - "epoch": 0.444756802630881, - "grad_norm": 2.243675947189331, - "learning_rate": 4.258738662281865e-05, - "loss": 0.7969, - "step": 50310 - }, - { - "epoch": 0.44484520589119325, - "grad_norm": 3.3647618293762207, - "learning_rate": 4.258591323514678e-05, - "loss": 0.6842, - "step": 50320 - }, - { - "epoch": 0.4449336091515055, - "grad_norm": 4.149233341217041, - "learning_rate": 4.2584439847474914e-05, - "loss": 0.736, - "step": 50330 - }, - { - "epoch": 0.44502201241181777, - "grad_norm": 1.2286020517349243, - "learning_rate": 4.258296645980304e-05, - "loss": 0.7811, - "step": 50340 - }, - { - "epoch": 0.44511041567213, - "grad_norm": 4.393327236175537, - "learning_rate": 4.258149307213117e-05, - "loss": 0.7214, - "step": 50350 - }, - { - "epoch": 0.44519881893244223, - "grad_norm": 7.338047027587891, - "learning_rate": 4.25800196844593e-05, - "loss": 0.7202, - "step": 50360 - }, - { - "epoch": 0.44528722219275446, - "grad_norm": 2.7075812816619873, - "learning_rate": 4.257854629678743e-05, - "loss": 0.6667, - "step": 50370 - }, - { - "epoch": 0.4453756254530667, - "grad_norm": 4.2327656745910645, - "learning_rate": 4.2577072909115556e-05, - "loss": 0.7128, - "step": 50380 - }, - { - "epoch": 0.4454640287133789, - "grad_norm": 2.7099413871765137, - "learning_rate": 4.257559952144369e-05, - "loss": 0.5684, - "step": 50390 - }, - { - "epoch": 0.4455524319736912, - "grad_norm": 3.604851245880127, - "learning_rate": 4.257412613377181e-05, - "loss": 0.6998, - "step": 50400 - }, - { - "epoch": 0.44564083523400344, - "grad_norm": 3.393521785736084, - "learning_rate": 4.257265274609995e-05, - "loss": 0.679, - "step": 50410 - }, - { - "epoch": 0.44572923849431567, - "grad_norm": 3.353628635406494, - "learning_rate": 4.2571179358428076e-05, - "loss": 0.586, - "step": 50420 - }, - { - "epoch": 0.4458176417546279, - "grad_norm": 3.7172305583953857, - "learning_rate": 4.2569705970756204e-05, - "loss": 0.8801, - "step": 50430 - }, - { - "epoch": 0.44590604501494013, - "grad_norm": 1.9572820663452148, - "learning_rate": 4.256823258308433e-05, - "loss": 0.7879, - "step": 50440 - }, - { - "epoch": 0.44599444827525236, - "grad_norm": 3.830937385559082, - "learning_rate": 4.256675919541247e-05, - "loss": 0.6682, - "step": 50450 - }, - { - "epoch": 0.44608285153556465, - "grad_norm": 6.2671074867248535, - "learning_rate": 4.256528580774059e-05, - "loss": 0.6641, - "step": 50460 - }, - { - "epoch": 0.4461712547958769, - "grad_norm": 3.6998579502105713, - "learning_rate": 4.2563812420068724e-05, - "loss": 0.6657, - "step": 50470 - }, - { - "epoch": 0.4462596580561891, - "grad_norm": 2.041537284851074, - "learning_rate": 4.256233903239685e-05, - "loss": 0.629, - "step": 50480 - }, - { - "epoch": 0.44634806131650134, - "grad_norm": 14.65803050994873, - "learning_rate": 4.256086564472498e-05, - "loss": 0.6656, - "step": 50490 - }, - { - "epoch": 0.4464364645768136, - "grad_norm": 7.739551067352295, - "learning_rate": 4.255939225705311e-05, - "loss": 0.8985, - "step": 50500 - }, - { - "epoch": 0.44652486783712586, - "grad_norm": 3.0493173599243164, - "learning_rate": 4.255791886938124e-05, - "loss": 0.7058, - "step": 50510 - }, - { - "epoch": 0.4466132710974381, - "grad_norm": 7.795433044433594, - "learning_rate": 4.2556445481709366e-05, - "loss": 0.7278, - "step": 50520 - }, - { - "epoch": 0.4467016743577503, - "grad_norm": 2.150282859802246, - "learning_rate": 4.25549720940375e-05, - "loss": 0.7359, - "step": 50530 - }, - { - "epoch": 0.44679007761806255, - "grad_norm": 3.0631790161132812, - "learning_rate": 4.255349870636562e-05, - "loss": 0.7116, - "step": 50540 - }, - { - "epoch": 0.4468784808783748, - "grad_norm": 3.001638174057007, - "learning_rate": 4.255202531869376e-05, - "loss": 0.7833, - "step": 50550 - }, - { - "epoch": 0.446966884138687, - "grad_norm": 3.490363121032715, - "learning_rate": 4.2550551931021886e-05, - "loss": 0.7498, - "step": 50560 - }, - { - "epoch": 0.4470552873989993, - "grad_norm": 1.8131797313690186, - "learning_rate": 4.2549078543350015e-05, - "loss": 0.6677, - "step": 50570 - }, - { - "epoch": 0.44714369065931153, - "grad_norm": 2.176320791244507, - "learning_rate": 4.254760515567814e-05, - "loss": 0.7848, - "step": 50580 - }, - { - "epoch": 0.44723209391962376, - "grad_norm": 2.8201351165771484, - "learning_rate": 4.254613176800628e-05, - "loss": 0.6118, - "step": 50590 - }, - { - "epoch": 0.447320497179936, - "grad_norm": 7.577088832855225, - "learning_rate": 4.25446583803344e-05, - "loss": 0.6237, - "step": 50600 - }, - { - "epoch": 0.4474089004402482, - "grad_norm": 4.768428325653076, - "learning_rate": 4.2543184992662535e-05, - "loss": 0.7648, - "step": 50610 - }, - { - "epoch": 0.44749730370056046, - "grad_norm": 3.6399502754211426, - "learning_rate": 4.2541711604990656e-05, - "loss": 0.659, - "step": 50620 - }, - { - "epoch": 0.44758570696087274, - "grad_norm": 4.284818172454834, - "learning_rate": 4.254023821731879e-05, - "loss": 0.8383, - "step": 50630 - }, - { - "epoch": 0.447674110221185, - "grad_norm": 4.566434860229492, - "learning_rate": 4.253876482964692e-05, - "loss": 0.6057, - "step": 50640 - }, - { - "epoch": 0.4477625134814972, - "grad_norm": 5.5485615730285645, - "learning_rate": 4.253729144197505e-05, - "loss": 0.7449, - "step": 50650 - }, - { - "epoch": 0.44785091674180943, - "grad_norm": 1.481044888496399, - "learning_rate": 4.253581805430318e-05, - "loss": 0.7224, - "step": 50660 - }, - { - "epoch": 0.44793932000212167, - "grad_norm": 1.7246774435043335, - "learning_rate": 4.253434466663131e-05, - "loss": 0.608, - "step": 50670 - }, - { - "epoch": 0.4480277232624339, - "grad_norm": 5.615640640258789, - "learning_rate": 4.253287127895943e-05, - "loss": 0.7658, - "step": 50680 - }, - { - "epoch": 0.4481161265227462, - "grad_norm": 5.9075188636779785, - "learning_rate": 4.253139789128757e-05, - "loss": 0.8186, - "step": 50690 - }, - { - "epoch": 0.4482045297830584, - "grad_norm": 4.552731037139893, - "learning_rate": 4.25299245036157e-05, - "loss": 0.773, - "step": 50700 - }, - { - "epoch": 0.44829293304337064, - "grad_norm": 2.067314624786377, - "learning_rate": 4.2528451115943825e-05, - "loss": 0.6254, - "step": 50710 - }, - { - "epoch": 0.4483813363036829, - "grad_norm": 5.75351619720459, - "learning_rate": 4.2526977728271954e-05, - "loss": 0.6519, - "step": 50720 - }, - { - "epoch": 0.4484697395639951, - "grad_norm": 1.9102205038070679, - "learning_rate": 4.252550434060008e-05, - "loss": 0.7194, - "step": 50730 - }, - { - "epoch": 0.44855814282430734, - "grad_norm": 2.0806591510772705, - "learning_rate": 4.252403095292821e-05, - "loss": 0.729, - "step": 50740 - }, - { - "epoch": 0.4486465460846196, - "grad_norm": 2.3463573455810547, - "learning_rate": 4.2522557565256345e-05, - "loss": 0.7261, - "step": 50750 - }, - { - "epoch": 0.44873494934493185, - "grad_norm": 1.6279937028884888, - "learning_rate": 4.252108417758447e-05, - "loss": 0.8222, - "step": 50760 - }, - { - "epoch": 0.4488233526052441, - "grad_norm": 5.521528720855713, - "learning_rate": 4.25196107899126e-05, - "loss": 0.8362, - "step": 50770 - }, - { - "epoch": 0.4489117558655563, - "grad_norm": 3.009153127670288, - "learning_rate": 4.251813740224073e-05, - "loss": 0.7793, - "step": 50780 - }, - { - "epoch": 0.44900015912586855, - "grad_norm": 4.9164652824401855, - "learning_rate": 4.251666401456886e-05, - "loss": 0.7881, - "step": 50790 - }, - { - "epoch": 0.4490885623861808, - "grad_norm": 5.745452880859375, - "learning_rate": 4.251519062689699e-05, - "loss": 0.5978, - "step": 50800 - }, - { - "epoch": 0.44917696564649306, - "grad_norm": 2.97101092338562, - "learning_rate": 4.251371723922512e-05, - "loss": 0.7987, - "step": 50810 - }, - { - "epoch": 0.4492653689068053, - "grad_norm": 3.5567069053649902, - "learning_rate": 4.2512243851553244e-05, - "loss": 0.7784, - "step": 50820 - }, - { - "epoch": 0.4493537721671175, - "grad_norm": 7.108294486999512, - "learning_rate": 4.251077046388138e-05, - "loss": 0.7194, - "step": 50830 - }, - { - "epoch": 0.44944217542742976, - "grad_norm": 6.408024311065674, - "learning_rate": 4.25092970762095e-05, - "loss": 0.7198, - "step": 50840 - }, - { - "epoch": 0.449530578687742, - "grad_norm": 1.4201781749725342, - "learning_rate": 4.2507823688537636e-05, - "loss": 0.7156, - "step": 50850 - }, - { - "epoch": 0.4496189819480542, - "grad_norm": 5.604401111602783, - "learning_rate": 4.2506350300865764e-05, - "loss": 0.713, - "step": 50860 - }, - { - "epoch": 0.4497073852083665, - "grad_norm": 5.523240566253662, - "learning_rate": 4.250487691319389e-05, - "loss": 0.705, - "step": 50870 - }, - { - "epoch": 0.44979578846867874, - "grad_norm": 3.3463923931121826, - "learning_rate": 4.250340352552202e-05, - "loss": 0.7345, - "step": 50880 - }, - { - "epoch": 0.44988419172899097, - "grad_norm": 2.3162336349487305, - "learning_rate": 4.2501930137850156e-05, - "loss": 0.7248, - "step": 50890 - }, - { - "epoch": 0.4499725949893032, - "grad_norm": 1.6102399826049805, - "learning_rate": 4.250045675017828e-05, - "loss": 0.627, - "step": 50900 - }, - { - "epoch": 0.45006099824961543, - "grad_norm": 3.2316250801086426, - "learning_rate": 4.249898336250641e-05, - "loss": 0.8308, - "step": 50910 - }, - { - "epoch": 0.45014940150992766, - "grad_norm": 2.5728564262390137, - "learning_rate": 4.249750997483454e-05, - "loss": 0.8771, - "step": 50920 - }, - { - "epoch": 0.45023780477023995, - "grad_norm": 1.5083261728286743, - "learning_rate": 4.249603658716267e-05, - "loss": 0.71, - "step": 50930 - }, - { - "epoch": 0.4503262080305522, - "grad_norm": 11.411937713623047, - "learning_rate": 4.24945631994908e-05, - "loss": 0.7299, - "step": 50940 - }, - { - "epoch": 0.4504146112908644, - "grad_norm": 4.420965671539307, - "learning_rate": 4.249308981181893e-05, - "loss": 0.7322, - "step": 50950 - }, - { - "epoch": 0.45050301455117664, - "grad_norm": 7.932520389556885, - "learning_rate": 4.2491616424147054e-05, - "loss": 0.7061, - "step": 50960 - }, - { - "epoch": 0.45059141781148887, - "grad_norm": 4.018369197845459, - "learning_rate": 4.249014303647519e-05, - "loss": 0.717, - "step": 50970 - }, - { - "epoch": 0.4506798210718011, - "grad_norm": 4.696685314178467, - "learning_rate": 4.248866964880331e-05, - "loss": 0.7478, - "step": 50980 - }, - { - "epoch": 0.4507682243321134, - "grad_norm": 6.151251792907715, - "learning_rate": 4.2487196261131446e-05, - "loss": 0.7025, - "step": 50990 - }, - { - "epoch": 0.4508566275924256, - "grad_norm": 2.805976152420044, - "learning_rate": 4.2485722873459575e-05, - "loss": 0.6407, - "step": 51000 - }, - { - "epoch": 0.45094503085273785, - "grad_norm": 11.724607467651367, - "learning_rate": 4.24842494857877e-05, - "loss": 0.766, - "step": 51010 - }, - { - "epoch": 0.4510334341130501, - "grad_norm": 3.093700885772705, - "learning_rate": 4.248277609811583e-05, - "loss": 0.6129, - "step": 51020 - }, - { - "epoch": 0.4511218373733623, - "grad_norm": 6.869553565979004, - "learning_rate": 4.2481302710443966e-05, - "loss": 0.7046, - "step": 51030 - }, - { - "epoch": 0.4512102406336746, - "grad_norm": 7.355849266052246, - "learning_rate": 4.247982932277209e-05, - "loss": 0.7978, - "step": 51040 - }, - { - "epoch": 0.45129864389398683, - "grad_norm": 3.561415195465088, - "learning_rate": 4.247835593510022e-05, - "loss": 0.6356, - "step": 51050 - }, - { - "epoch": 0.45138704715429906, - "grad_norm": 4.891472816467285, - "learning_rate": 4.247688254742835e-05, - "loss": 0.6888, - "step": 51060 - }, - { - "epoch": 0.4514754504146113, - "grad_norm": 4.600146770477295, - "learning_rate": 4.247540915975648e-05, - "loss": 0.6876, - "step": 51070 - }, - { - "epoch": 0.4515638536749235, - "grad_norm": 3.3943684101104736, - "learning_rate": 4.247393577208461e-05, - "loss": 0.6932, - "step": 51080 - }, - { - "epoch": 0.45165225693523575, - "grad_norm": 6.3659586906433105, - "learning_rate": 4.2472462384412737e-05, - "loss": 0.8293, - "step": 51090 - }, - { - "epoch": 0.45174066019554804, - "grad_norm": 2.781177043914795, - "learning_rate": 4.2470988996740865e-05, - "loss": 0.8964, - "step": 51100 - }, - { - "epoch": 0.45182906345586027, - "grad_norm": 2.336111068725586, - "learning_rate": 4.2469515609069e-05, - "loss": 0.8381, - "step": 51110 - }, - { - "epoch": 0.4519174667161725, - "grad_norm": 4.049881458282471, - "learning_rate": 4.246804222139713e-05, - "loss": 0.7464, - "step": 51120 - }, - { - "epoch": 0.45200586997648473, - "grad_norm": 3.050784111022949, - "learning_rate": 4.246656883372526e-05, - "loss": 0.7435, - "step": 51130 - }, - { - "epoch": 0.45209427323679696, - "grad_norm": 1.7821450233459473, - "learning_rate": 4.2465095446053385e-05, - "loss": 0.6286, - "step": 51140 - }, - { - "epoch": 0.4521826764971092, - "grad_norm": 1.96043860912323, - "learning_rate": 4.2463622058381513e-05, - "loss": 0.832, - "step": 51150 - }, - { - "epoch": 0.4522710797574215, - "grad_norm": 4.979556083679199, - "learning_rate": 4.246214867070964e-05, - "loss": 0.5888, - "step": 51160 - }, - { - "epoch": 0.4523594830177337, - "grad_norm": 4.7774858474731445, - "learning_rate": 4.246067528303778e-05, - "loss": 0.7634, - "step": 51170 - }, - { - "epoch": 0.45244788627804594, - "grad_norm": 7.052032470703125, - "learning_rate": 4.2459201895365905e-05, - "loss": 0.7601, - "step": 51180 - }, - { - "epoch": 0.45253628953835817, - "grad_norm": 2.726879119873047, - "learning_rate": 4.2457728507694034e-05, - "loss": 0.826, - "step": 51190 - }, - { - "epoch": 0.4526246927986704, - "grad_norm": 5.2360758781433105, - "learning_rate": 4.245625512002216e-05, - "loss": 0.5117, - "step": 51200 - }, - { - "epoch": 0.45271309605898263, - "grad_norm": 2.565112829208374, - "learning_rate": 4.245478173235029e-05, - "loss": 0.7631, - "step": 51210 - }, - { - "epoch": 0.4528014993192949, - "grad_norm": 1.7049453258514404, - "learning_rate": 4.245330834467842e-05, - "loss": 0.6309, - "step": 51220 - }, - { - "epoch": 0.45288990257960715, - "grad_norm": 4.346341609954834, - "learning_rate": 4.245183495700655e-05, - "loss": 0.8687, - "step": 51230 - }, - { - "epoch": 0.4529783058399194, - "grad_norm": 7.231688022613525, - "learning_rate": 4.245036156933468e-05, - "loss": 0.6993, - "step": 51240 - }, - { - "epoch": 0.4530667091002316, - "grad_norm": 5.763049602508545, - "learning_rate": 4.244888818166281e-05, - "loss": 0.7153, - "step": 51250 - }, - { - "epoch": 0.45315511236054384, - "grad_norm": 2.694974899291992, - "learning_rate": 4.244741479399094e-05, - "loss": 0.6416, - "step": 51260 - }, - { - "epoch": 0.4532435156208561, - "grad_norm": 11.770453453063965, - "learning_rate": 4.244594140631907e-05, - "loss": 0.7204, - "step": 51270 - }, - { - "epoch": 0.45333191888116836, - "grad_norm": 1.7721461057662964, - "learning_rate": 4.2444468018647196e-05, - "loss": 0.6159, - "step": 51280 - }, - { - "epoch": 0.4534203221414806, - "grad_norm": 1.4127252101898193, - "learning_rate": 4.2442994630975324e-05, - "loss": 0.6948, - "step": 51290 - }, - { - "epoch": 0.4535087254017928, - "grad_norm": 3.8951640129089355, - "learning_rate": 4.244152124330346e-05, - "loss": 0.711, - "step": 51300 - }, - { - "epoch": 0.45359712866210505, - "grad_norm": 4.719071388244629, - "learning_rate": 4.244004785563158e-05, - "loss": 0.6654, - "step": 51310 - }, - { - "epoch": 0.4536855319224173, - "grad_norm": 1.3236104249954224, - "learning_rate": 4.2438574467959716e-05, - "loss": 0.7087, - "step": 51320 - }, - { - "epoch": 0.4537739351827295, - "grad_norm": 3.448472499847412, - "learning_rate": 4.2437101080287844e-05, - "loss": 0.6766, - "step": 51330 - }, - { - "epoch": 0.4538623384430418, - "grad_norm": 1.7239394187927246, - "learning_rate": 4.243562769261597e-05, - "loss": 0.572, - "step": 51340 - }, - { - "epoch": 0.45395074170335403, - "grad_norm": 9.879199028015137, - "learning_rate": 4.24341543049441e-05, - "loss": 0.7762, - "step": 51350 - }, - { - "epoch": 0.45403914496366626, - "grad_norm": 9.45833683013916, - "learning_rate": 4.2432680917272236e-05, - "loss": 0.7796, - "step": 51360 - }, - { - "epoch": 0.4541275482239785, - "grad_norm": 1.6749019622802734, - "learning_rate": 4.243120752960036e-05, - "loss": 0.7972, - "step": 51370 - }, - { - "epoch": 0.4542159514842907, - "grad_norm": 1.6194690465927124, - "learning_rate": 4.242973414192849e-05, - "loss": 0.8425, - "step": 51380 - }, - { - "epoch": 0.45430435474460296, - "grad_norm": 3.211587905883789, - "learning_rate": 4.242826075425662e-05, - "loss": 0.6692, - "step": 51390 - }, - { - "epoch": 0.45439275800491524, - "grad_norm": 4.525156497955322, - "learning_rate": 4.242678736658475e-05, - "loss": 0.7135, - "step": 51400 - }, - { - "epoch": 0.4544811612652275, - "grad_norm": 1.4770921468734741, - "learning_rate": 4.242531397891288e-05, - "loss": 0.6965, - "step": 51410 - }, - { - "epoch": 0.4545695645255397, - "grad_norm": 6.5885009765625, - "learning_rate": 4.242384059124101e-05, - "loss": 0.7756, - "step": 51420 - }, - { - "epoch": 0.45465796778585194, - "grad_norm": 8.741536140441895, - "learning_rate": 4.2422367203569134e-05, - "loss": 0.7028, - "step": 51430 - }, - { - "epoch": 0.45474637104616417, - "grad_norm": 2.4928131103515625, - "learning_rate": 4.242089381589727e-05, - "loss": 0.7567, - "step": 51440 - }, - { - "epoch": 0.4548347743064764, - "grad_norm": 4.189772605895996, - "learning_rate": 4.241942042822539e-05, - "loss": 0.5896, - "step": 51450 - }, - { - "epoch": 0.4549231775667887, - "grad_norm": 5.977318286895752, - "learning_rate": 4.2417947040553526e-05, - "loss": 0.6195, - "step": 51460 - }, - { - "epoch": 0.4550115808271009, - "grad_norm": 8.101325035095215, - "learning_rate": 4.2416473652881655e-05, - "loss": 0.7467, - "step": 51470 - }, - { - "epoch": 0.45509998408741315, - "grad_norm": 5.809628486633301, - "learning_rate": 4.241500026520978e-05, - "loss": 0.6599, - "step": 51480 - }, - { - "epoch": 0.4551883873477254, - "grad_norm": 6.189295768737793, - "learning_rate": 4.241352687753791e-05, - "loss": 0.5845, - "step": 51490 - }, - { - "epoch": 0.4552767906080376, - "grad_norm": 1.7385181188583374, - "learning_rate": 4.2412053489866047e-05, - "loss": 0.5603, - "step": 51500 - }, - { - "epoch": 0.45536519386834984, - "grad_norm": 3.193965196609497, - "learning_rate": 4.241058010219417e-05, - "loss": 0.6597, - "step": 51510 - }, - { - "epoch": 0.4554535971286621, - "grad_norm": 4.2179107666015625, - "learning_rate": 4.24091067145223e-05, - "loss": 0.7087, - "step": 51520 - }, - { - "epoch": 0.45554200038897436, - "grad_norm": 2.7894127368927, - "learning_rate": 4.240763332685043e-05, - "loss": 0.6678, - "step": 51530 - }, - { - "epoch": 0.4556304036492866, - "grad_norm": 5.392573356628418, - "learning_rate": 4.240615993917856e-05, - "loss": 0.5749, - "step": 51540 - }, - { - "epoch": 0.4557188069095988, - "grad_norm": 2.8715810775756836, - "learning_rate": 4.240468655150669e-05, - "loss": 0.7528, - "step": 51550 - }, - { - "epoch": 0.45580721016991105, - "grad_norm": 7.369666576385498, - "learning_rate": 4.240321316383482e-05, - "loss": 0.6643, - "step": 51560 - }, - { - "epoch": 0.45589561343022333, - "grad_norm": 2.1460392475128174, - "learning_rate": 4.2401739776162945e-05, - "loss": 0.6124, - "step": 51570 - }, - { - "epoch": 0.45598401669053557, - "grad_norm": 2.804109811782837, - "learning_rate": 4.240026638849108e-05, - "loss": 0.7637, - "step": 51580 - }, - { - "epoch": 0.4560724199508478, - "grad_norm": 5.18781042098999, - "learning_rate": 4.23987930008192e-05, - "loss": 0.7274, - "step": 51590 - }, - { - "epoch": 0.45616082321116, - "grad_norm": 7.195793151855469, - "learning_rate": 4.239731961314734e-05, - "loss": 0.7772, - "step": 51600 - }, - { - "epoch": 0.45624922647147226, - "grad_norm": 10.898725509643555, - "learning_rate": 4.2395846225475465e-05, - "loss": 0.699, - "step": 51610 - }, - { - "epoch": 0.4563376297317845, - "grad_norm": 3.8482308387756348, - "learning_rate": 4.2394372837803594e-05, - "loss": 0.6364, - "step": 51620 - }, - { - "epoch": 0.4564260329920968, - "grad_norm": 4.435975074768066, - "learning_rate": 4.239289945013172e-05, - "loss": 0.7792, - "step": 51630 - }, - { - "epoch": 0.456514436252409, - "grad_norm": 10.318117141723633, - "learning_rate": 4.239142606245986e-05, - "loss": 0.7525, - "step": 51640 - }, - { - "epoch": 0.45660283951272124, - "grad_norm": 1.6074044704437256, - "learning_rate": 4.238995267478798e-05, - "loss": 0.7726, - "step": 51650 - }, - { - "epoch": 0.45669124277303347, - "grad_norm": 4.462329864501953, - "learning_rate": 4.2388479287116114e-05, - "loss": 0.489, - "step": 51660 - }, - { - "epoch": 0.4567796460333457, - "grad_norm": 3.9491384029388428, - "learning_rate": 4.2387005899444235e-05, - "loss": 0.6887, - "step": 51670 - }, - { - "epoch": 0.45686804929365793, - "grad_norm": 5.480148792266846, - "learning_rate": 4.238553251177237e-05, - "loss": 0.7327, - "step": 51680 - }, - { - "epoch": 0.4569564525539702, - "grad_norm": 9.441926002502441, - "learning_rate": 4.23840591241005e-05, - "loss": 0.7712, - "step": 51690 - }, - { - "epoch": 0.45704485581428245, - "grad_norm": 7.183199882507324, - "learning_rate": 4.238258573642863e-05, - "loss": 0.7392, - "step": 51700 - }, - { - "epoch": 0.4571332590745947, - "grad_norm": 7.0118560791015625, - "learning_rate": 4.2381112348756755e-05, - "loss": 0.7721, - "step": 51710 - }, - { - "epoch": 0.4572216623349069, - "grad_norm": 6.411294460296631, - "learning_rate": 4.237963896108489e-05, - "loss": 0.6004, - "step": 51720 - }, - { - "epoch": 0.45731006559521914, - "grad_norm": 11.087834358215332, - "learning_rate": 4.237816557341301e-05, - "loss": 0.7065, - "step": 51730 - }, - { - "epoch": 0.45739846885553137, - "grad_norm": 3.378361701965332, - "learning_rate": 4.237669218574115e-05, - "loss": 0.8234, - "step": 51740 - }, - { - "epoch": 0.45748687211584366, - "grad_norm": 6.5096893310546875, - "learning_rate": 4.2375218798069276e-05, - "loss": 0.5817, - "step": 51750 - }, - { - "epoch": 0.4575752753761559, - "grad_norm": 2.6013710498809814, - "learning_rate": 4.2373745410397404e-05, - "loss": 0.7469, - "step": 51760 - }, - { - "epoch": 0.4576636786364681, - "grad_norm": 6.895487308502197, - "learning_rate": 4.237227202272553e-05, - "loss": 0.7929, - "step": 51770 - }, - { - "epoch": 0.45775208189678035, - "grad_norm": 7.680325031280518, - "learning_rate": 4.237079863505366e-05, - "loss": 0.7848, - "step": 51780 - }, - { - "epoch": 0.4578404851570926, - "grad_norm": 2.104198455810547, - "learning_rate": 4.236932524738179e-05, - "loss": 0.7355, - "step": 51790 - }, - { - "epoch": 0.4579288884174048, - "grad_norm": 2.9252047538757324, - "learning_rate": 4.2367851859709924e-05, - "loss": 0.7322, - "step": 51800 - }, - { - "epoch": 0.4580172916777171, - "grad_norm": 3.3038887977600098, - "learning_rate": 4.2366378472038046e-05, - "loss": 0.6402, - "step": 51810 - }, - { - "epoch": 0.45810569493802933, - "grad_norm": 3.781679153442383, - "learning_rate": 4.236490508436618e-05, - "loss": 0.6917, - "step": 51820 - }, - { - "epoch": 0.45819409819834156, - "grad_norm": 4.439880847930908, - "learning_rate": 4.236343169669431e-05, - "loss": 0.562, - "step": 51830 - }, - { - "epoch": 0.4582825014586538, - "grad_norm": 3.3872768878936768, - "learning_rate": 4.236195830902244e-05, - "loss": 0.8094, - "step": 51840 - }, - { - "epoch": 0.458370904718966, - "grad_norm": 5.398439884185791, - "learning_rate": 4.2360484921350566e-05, - "loss": 0.8208, - "step": 51850 - }, - { - "epoch": 0.45845930797927825, - "grad_norm": 4.339428901672363, - "learning_rate": 4.23590115336787e-05, - "loss": 0.6078, - "step": 51860 - }, - { - "epoch": 0.45854771123959054, - "grad_norm": 1.3413759469985962, - "learning_rate": 4.235753814600682e-05, - "loss": 0.789, - "step": 51870 - }, - { - "epoch": 0.45863611449990277, - "grad_norm": 4.0492353439331055, - "learning_rate": 4.235606475833496e-05, - "loss": 0.6432, - "step": 51880 - }, - { - "epoch": 0.458724517760215, - "grad_norm": 1.8701279163360596, - "learning_rate": 4.2354591370663086e-05, - "loss": 0.6345, - "step": 51890 - }, - { - "epoch": 0.45881292102052723, - "grad_norm": 4.309274196624756, - "learning_rate": 4.2353117982991215e-05, - "loss": 0.5966, - "step": 51900 - }, - { - "epoch": 0.45890132428083946, - "grad_norm": 2.160752058029175, - "learning_rate": 4.235164459531934e-05, - "loss": 0.6225, - "step": 51910 - }, - { - "epoch": 0.4589897275411517, - "grad_norm": 2.036487340927124, - "learning_rate": 4.235017120764747e-05, - "loss": 0.6836, - "step": 51920 - }, - { - "epoch": 0.459078130801464, - "grad_norm": 13.578177452087402, - "learning_rate": 4.23486978199756e-05, - "loss": 0.6748, - "step": 51930 - }, - { - "epoch": 0.4591665340617762, - "grad_norm": 3.629070281982422, - "learning_rate": 4.2347224432303735e-05, - "loss": 0.8608, - "step": 51940 - }, - { - "epoch": 0.45925493732208844, - "grad_norm": 4.116369247436523, - "learning_rate": 4.2345751044631856e-05, - "loss": 0.7195, - "step": 51950 - }, - { - "epoch": 0.45934334058240067, - "grad_norm": 6.127593994140625, - "learning_rate": 4.234427765695999e-05, - "loss": 0.7316, - "step": 51960 - }, - { - "epoch": 0.4594317438427129, - "grad_norm": 2.790940284729004, - "learning_rate": 4.234280426928812e-05, - "loss": 0.7933, - "step": 51970 - }, - { - "epoch": 0.45952014710302513, - "grad_norm": 1.463698387145996, - "learning_rate": 4.234133088161625e-05, - "loss": 0.5768, - "step": 51980 - }, - { - "epoch": 0.4596085503633374, - "grad_norm": 3.5501725673675537, - "learning_rate": 4.2339857493944377e-05, - "loss": 0.743, - "step": 51990 - }, - { - "epoch": 0.45969695362364965, - "grad_norm": 1.4623552560806274, - "learning_rate": 4.233838410627251e-05, - "loss": 0.6944, - "step": 52000 - }, - { - "epoch": 0.4597853568839619, - "grad_norm": 5.345694541931152, - "learning_rate": 4.233691071860063e-05, - "loss": 0.7163, - "step": 52010 - }, - { - "epoch": 0.4598737601442741, - "grad_norm": 1.3032469749450684, - "learning_rate": 4.233543733092877e-05, - "loss": 0.6934, - "step": 52020 - }, - { - "epoch": 0.45996216340458634, - "grad_norm": 2.792205810546875, - "learning_rate": 4.23339639432569e-05, - "loss": 0.7318, - "step": 52030 - }, - { - "epoch": 0.4600505666648986, - "grad_norm": 5.106879711151123, - "learning_rate": 4.2332490555585025e-05, - "loss": 0.7482, - "step": 52040 - }, - { - "epoch": 0.46013896992521086, - "grad_norm": 3.861652374267578, - "learning_rate": 4.2331017167913153e-05, - "loss": 0.7403, - "step": 52050 - }, - { - "epoch": 0.4602273731855231, - "grad_norm": 2.3954854011535645, - "learning_rate": 4.232954378024128e-05, - "loss": 0.6757, - "step": 52060 - }, - { - "epoch": 0.4603157764458353, - "grad_norm": 7.476187229156494, - "learning_rate": 4.232807039256941e-05, - "loss": 0.8295, - "step": 52070 - }, - { - "epoch": 0.46040417970614755, - "grad_norm": 3.2276484966278076, - "learning_rate": 4.2326597004897545e-05, - "loss": 0.7112, - "step": 52080 - }, - { - "epoch": 0.4604925829664598, - "grad_norm": 5.918450832366943, - "learning_rate": 4.2325123617225674e-05, - "loss": 0.6645, - "step": 52090 - }, - { - "epoch": 0.460580986226772, - "grad_norm": 1.5007085800170898, - "learning_rate": 4.23236502295538e-05, - "loss": 0.7499, - "step": 52100 - }, - { - "epoch": 0.4606693894870843, - "grad_norm": 2.5571868419647217, - "learning_rate": 4.232217684188193e-05, - "loss": 0.7393, - "step": 52110 - }, - { - "epoch": 0.46075779274739653, - "grad_norm": 4.453342437744141, - "learning_rate": 4.232070345421006e-05, - "loss": 0.7919, - "step": 52120 - }, - { - "epoch": 0.46084619600770876, - "grad_norm": 8.468208312988281, - "learning_rate": 4.231923006653819e-05, - "loss": 0.6137, - "step": 52130 - }, - { - "epoch": 0.460934599268021, - "grad_norm": 5.198253631591797, - "learning_rate": 4.2317756678866315e-05, - "loss": 0.7684, - "step": 52140 - }, - { - "epoch": 0.4610230025283332, - "grad_norm": 2.5067381858825684, - "learning_rate": 4.231628329119445e-05, - "loss": 0.7395, - "step": 52150 - }, - { - "epoch": 0.4611114057886455, - "grad_norm": 5.629721164703369, - "learning_rate": 4.231480990352258e-05, - "loss": 0.7291, - "step": 52160 - }, - { - "epoch": 0.46119980904895774, - "grad_norm": 4.729606628417969, - "learning_rate": 4.231333651585071e-05, - "loss": 0.8573, - "step": 52170 - }, - { - "epoch": 0.46128821230927, - "grad_norm": 7.049135208129883, - "learning_rate": 4.2311863128178836e-05, - "loss": 0.6238, - "step": 52180 - }, - { - "epoch": 0.4613766155695822, - "grad_norm": 5.8184919357299805, - "learning_rate": 4.2310389740506964e-05, - "loss": 0.7103, - "step": 52190 - }, - { - "epoch": 0.46146501882989444, - "grad_norm": 5.012911796569824, - "learning_rate": 4.230891635283509e-05, - "loss": 0.7557, - "step": 52200 - }, - { - "epoch": 0.46155342209020667, - "grad_norm": 4.654698371887207, - "learning_rate": 4.230744296516323e-05, - "loss": 0.6179, - "step": 52210 - }, - { - "epoch": 0.46164182535051895, - "grad_norm": 4.618826866149902, - "learning_rate": 4.2305969577491356e-05, - "loss": 0.6809, - "step": 52220 - }, - { - "epoch": 0.4617302286108312, - "grad_norm": 3.6892995834350586, - "learning_rate": 4.2304496189819484e-05, - "loss": 0.8625, - "step": 52230 - }, - { - "epoch": 0.4618186318711434, - "grad_norm": 2.1181583404541016, - "learning_rate": 4.230302280214761e-05, - "loss": 0.5665, - "step": 52240 - }, - { - "epoch": 0.46190703513145565, - "grad_norm": 2.672295331954956, - "learning_rate": 4.230154941447574e-05, - "loss": 0.6711, - "step": 52250 - }, - { - "epoch": 0.4619954383917679, - "grad_norm": 7.382059097290039, - "learning_rate": 4.230007602680387e-05, - "loss": 0.5662, - "step": 52260 - }, - { - "epoch": 0.4620838416520801, - "grad_norm": 9.90565013885498, - "learning_rate": 4.2298602639132004e-05, - "loss": 0.777, - "step": 52270 - }, - { - "epoch": 0.4621722449123924, - "grad_norm": 3.0468902587890625, - "learning_rate": 4.2297129251460126e-05, - "loss": 0.7485, - "step": 52280 - }, - { - "epoch": 0.4622606481727046, - "grad_norm": 3.749941349029541, - "learning_rate": 4.229565586378826e-05, - "loss": 0.6977, - "step": 52290 - }, - { - "epoch": 0.46234905143301686, - "grad_norm": 4.086340427398682, - "learning_rate": 4.229418247611639e-05, - "loss": 0.6526, - "step": 52300 - }, - { - "epoch": 0.4624374546933291, - "grad_norm": 10.430846214294434, - "learning_rate": 4.229270908844452e-05, - "loss": 0.6436, - "step": 52310 - }, - { - "epoch": 0.4625258579536413, - "grad_norm": 4.599187850952148, - "learning_rate": 4.2291235700772646e-05, - "loss": 0.6184, - "step": 52320 - }, - { - "epoch": 0.46261426121395355, - "grad_norm": 2.2521350383758545, - "learning_rate": 4.228976231310078e-05, - "loss": 0.6449, - "step": 52330 - }, - { - "epoch": 0.46270266447426583, - "grad_norm": 1.5472252368927002, - "learning_rate": 4.22882889254289e-05, - "loss": 0.7082, - "step": 52340 - }, - { - "epoch": 0.46279106773457807, - "grad_norm": 3.4148683547973633, - "learning_rate": 4.228681553775704e-05, - "loss": 0.5674, - "step": 52350 - }, - { - "epoch": 0.4628794709948903, - "grad_norm": 4.581430435180664, - "learning_rate": 4.2285342150085166e-05, - "loss": 0.8915, - "step": 52360 - }, - { - "epoch": 0.4629678742552025, - "grad_norm": 5.565468788146973, - "learning_rate": 4.2283868762413295e-05, - "loss": 0.7555, - "step": 52370 - }, - { - "epoch": 0.46305627751551476, - "grad_norm": 8.734930992126465, - "learning_rate": 4.228239537474142e-05, - "loss": 0.6926, - "step": 52380 - }, - { - "epoch": 0.463144680775827, - "grad_norm": 2.026643753051758, - "learning_rate": 4.228092198706955e-05, - "loss": 0.6251, - "step": 52390 - }, - { - "epoch": 0.4632330840361393, - "grad_norm": 1.6456331014633179, - "learning_rate": 4.227944859939768e-05, - "loss": 0.7649, - "step": 52400 - }, - { - "epoch": 0.4633214872964515, - "grad_norm": 1.775565266609192, - "learning_rate": 4.2277975211725815e-05, - "loss": 0.6088, - "step": 52410 - }, - { - "epoch": 0.46340989055676374, - "grad_norm": 4.751766681671143, - "learning_rate": 4.2276501824053936e-05, - "loss": 0.693, - "step": 52420 - }, - { - "epoch": 0.46349829381707597, - "grad_norm": 5.910129070281982, - "learning_rate": 4.227502843638207e-05, - "loss": 0.7025, - "step": 52430 - }, - { - "epoch": 0.4635866970773882, - "grad_norm": 3.6207194328308105, - "learning_rate": 4.22735550487102e-05, - "loss": 0.7307, - "step": 52440 - }, - { - "epoch": 0.46367510033770043, - "grad_norm": 6.6016693115234375, - "learning_rate": 4.227208166103833e-05, - "loss": 0.8024, - "step": 52450 - }, - { - "epoch": 0.4637635035980127, - "grad_norm": 2.4947381019592285, - "learning_rate": 4.2270608273366457e-05, - "loss": 0.6709, - "step": 52460 - }, - { - "epoch": 0.46385190685832495, - "grad_norm": 2.1537387371063232, - "learning_rate": 4.226913488569459e-05, - "loss": 0.6732, - "step": 52470 - }, - { - "epoch": 0.4639403101186372, - "grad_norm": 2.8036246299743652, - "learning_rate": 4.226766149802271e-05, - "loss": 0.7085, - "step": 52480 - }, - { - "epoch": 0.4640287133789494, - "grad_norm": 2.016580820083618, - "learning_rate": 4.226618811035085e-05, - "loss": 0.7783, - "step": 52490 - }, - { - "epoch": 0.46411711663926164, - "grad_norm": 3.0787274837493896, - "learning_rate": 4.226471472267897e-05, - "loss": 0.5591, - "step": 52500 - }, - { - "epoch": 0.46420551989957387, - "grad_norm": 7.491862773895264, - "learning_rate": 4.2263241335007105e-05, - "loss": 0.7915, - "step": 52510 - }, - { - "epoch": 0.46429392315988616, - "grad_norm": 5.583034038543701, - "learning_rate": 4.2261767947335233e-05, - "loss": 0.6445, - "step": 52520 - }, - { - "epoch": 0.4643823264201984, - "grad_norm": 3.946359157562256, - "learning_rate": 4.226029455966336e-05, - "loss": 0.7772, - "step": 52530 - }, - { - "epoch": 0.4644707296805106, - "grad_norm": 2.98714280128479, - "learning_rate": 4.225882117199149e-05, - "loss": 0.7471, - "step": 52540 - }, - { - "epoch": 0.46455913294082285, - "grad_norm": 3.2688980102539062, - "learning_rate": 4.2257347784319625e-05, - "loss": 0.6658, - "step": 52550 - }, - { - "epoch": 0.4646475362011351, - "grad_norm": 3.5141897201538086, - "learning_rate": 4.225587439664775e-05, - "loss": 0.6256, - "step": 52560 - }, - { - "epoch": 0.4647359394614473, - "grad_norm": 5.148055076599121, - "learning_rate": 4.225440100897588e-05, - "loss": 0.7596, - "step": 52570 - }, - { - "epoch": 0.4648243427217596, - "grad_norm": 9.163445472717285, - "learning_rate": 4.225292762130401e-05, - "loss": 0.774, - "step": 52580 - }, - { - "epoch": 0.46491274598207183, - "grad_norm": 2.8391289710998535, - "learning_rate": 4.225145423363214e-05, - "loss": 0.7007, - "step": 52590 - }, - { - "epoch": 0.46500114924238406, - "grad_norm": 6.29801607131958, - "learning_rate": 4.224998084596027e-05, - "loss": 0.8114, - "step": 52600 - }, - { - "epoch": 0.4650895525026963, - "grad_norm": 4.505367279052734, - "learning_rate": 4.2248507458288395e-05, - "loss": 0.7236, - "step": 52610 - }, - { - "epoch": 0.4651779557630085, - "grad_norm": 11.121551513671875, - "learning_rate": 4.2247034070616524e-05, - "loss": 0.7362, - "step": 52620 - }, - { - "epoch": 0.46526635902332075, - "grad_norm": 8.888157844543457, - "learning_rate": 4.224556068294466e-05, - "loss": 0.7404, - "step": 52630 - }, - { - "epoch": 0.46535476228363304, - "grad_norm": 18.693090438842773, - "learning_rate": 4.224408729527278e-05, - "loss": 0.5955, - "step": 52640 - }, - { - "epoch": 0.46544316554394527, - "grad_norm": 5.19010066986084, - "learning_rate": 4.2242613907600916e-05, - "loss": 0.762, - "step": 52650 - }, - { - "epoch": 0.4655315688042575, - "grad_norm": 5.069072246551514, - "learning_rate": 4.2241140519929044e-05, - "loss": 0.6206, - "step": 52660 - }, - { - "epoch": 0.46561997206456973, - "grad_norm": 9.630207061767578, - "learning_rate": 4.223966713225717e-05, - "loss": 0.796, - "step": 52670 - }, - { - "epoch": 0.46570837532488196, - "grad_norm": 3.386852741241455, - "learning_rate": 4.22381937445853e-05, - "loss": 0.6616, - "step": 52680 - }, - { - "epoch": 0.46579677858519425, - "grad_norm": 0.8200322389602661, - "learning_rate": 4.2236720356913436e-05, - "loss": 0.6339, - "step": 52690 - }, - { - "epoch": 0.4658851818455065, - "grad_norm": 1.5718613862991333, - "learning_rate": 4.223524696924156e-05, - "loss": 0.7416, - "step": 52700 - }, - { - "epoch": 0.4659735851058187, - "grad_norm": 3.913846731185913, - "learning_rate": 4.223377358156969e-05, - "loss": 0.8131, - "step": 52710 - }, - { - "epoch": 0.46606198836613094, - "grad_norm": 10.412713050842285, - "learning_rate": 4.2232300193897814e-05, - "loss": 0.5501, - "step": 52720 - }, - { - "epoch": 0.4661503916264432, - "grad_norm": 1.6883413791656494, - "learning_rate": 4.223082680622595e-05, - "loss": 0.7556, - "step": 52730 - }, - { - "epoch": 0.4662387948867554, - "grad_norm": 1.4152474403381348, - "learning_rate": 4.222935341855408e-05, - "loss": 0.7298, - "step": 52740 - }, - { - "epoch": 0.4663271981470677, - "grad_norm": 5.408758640289307, - "learning_rate": 4.2227880030882206e-05, - "loss": 0.5726, - "step": 52750 - }, - { - "epoch": 0.4664156014073799, - "grad_norm": 1.2836005687713623, - "learning_rate": 4.2226406643210334e-05, - "loss": 0.6989, - "step": 52760 - }, - { - "epoch": 0.46650400466769215, - "grad_norm": 11.31423282623291, - "learning_rate": 4.222493325553847e-05, - "loss": 0.6883, - "step": 52770 - }, - { - "epoch": 0.4665924079280044, - "grad_norm": 14.369865417480469, - "learning_rate": 4.222345986786659e-05, - "loss": 0.6813, - "step": 52780 - }, - { - "epoch": 0.4666808111883166, - "grad_norm": 6.9002790451049805, - "learning_rate": 4.2221986480194726e-05, - "loss": 0.73, - "step": 52790 - }, - { - "epoch": 0.46676921444862884, - "grad_norm": 2.44985294342041, - "learning_rate": 4.2220513092522854e-05, - "loss": 0.698, - "step": 52800 - }, - { - "epoch": 0.46685761770894113, - "grad_norm": 6.439610004425049, - "learning_rate": 4.221903970485098e-05, - "loss": 0.6888, - "step": 52810 - }, - { - "epoch": 0.46694602096925336, - "grad_norm": 7.309405326843262, - "learning_rate": 4.221756631717911e-05, - "loss": 0.7419, - "step": 52820 - }, - { - "epoch": 0.4670344242295656, - "grad_norm": 4.0311455726623535, - "learning_rate": 4.2216092929507246e-05, - "loss": 0.6956, - "step": 52830 - }, - { - "epoch": 0.4671228274898778, - "grad_norm": 3.848418951034546, - "learning_rate": 4.221461954183537e-05, - "loss": 0.5721, - "step": 52840 - }, - { - "epoch": 0.46721123075019005, - "grad_norm": 2.4307098388671875, - "learning_rate": 4.22131461541635e-05, - "loss": 0.7171, - "step": 52850 - }, - { - "epoch": 0.4672996340105023, - "grad_norm": 2.257612466812134, - "learning_rate": 4.2211672766491625e-05, - "loss": 0.7377, - "step": 52860 - }, - { - "epoch": 0.46738803727081457, - "grad_norm": 8.017738342285156, - "learning_rate": 4.221019937881976e-05, - "loss": 0.7199, - "step": 52870 - }, - { - "epoch": 0.4674764405311268, - "grad_norm": 4.491119861602783, - "learning_rate": 4.220872599114789e-05, - "loss": 0.677, - "step": 52880 - }, - { - "epoch": 0.46756484379143903, - "grad_norm": 7.327272891998291, - "learning_rate": 4.2207252603476016e-05, - "loss": 0.7773, - "step": 52890 - }, - { - "epoch": 0.46765324705175126, - "grad_norm": 6.760767936706543, - "learning_rate": 4.2205779215804145e-05, - "loss": 0.7271, - "step": 52900 - }, - { - "epoch": 0.4677416503120635, - "grad_norm": 5.4441704750061035, - "learning_rate": 4.220430582813228e-05, - "loss": 0.7207, - "step": 52910 - }, - { - "epoch": 0.4678300535723757, - "grad_norm": 4.132839679718018, - "learning_rate": 4.22028324404604e-05, - "loss": 0.7278, - "step": 52920 - }, - { - "epoch": 0.467918456832688, - "grad_norm": 3.256896495819092, - "learning_rate": 4.220135905278854e-05, - "loss": 0.7062, - "step": 52930 - }, - { - "epoch": 0.46800686009300024, - "grad_norm": 3.098531484603882, - "learning_rate": 4.2199885665116665e-05, - "loss": 0.806, - "step": 52940 - }, - { - "epoch": 0.4680952633533125, - "grad_norm": 3.618494749069214, - "learning_rate": 4.219841227744479e-05, - "loss": 0.5924, - "step": 52950 - }, - { - "epoch": 0.4681836666136247, - "grad_norm": 3.786844253540039, - "learning_rate": 4.219693888977292e-05, - "loss": 0.7546, - "step": 52960 - }, - { - "epoch": 0.46827206987393694, - "grad_norm": 4.665619373321533, - "learning_rate": 4.219546550210105e-05, - "loss": 0.8267, - "step": 52970 - }, - { - "epoch": 0.46836047313424917, - "grad_norm": 12.414656639099121, - "learning_rate": 4.219399211442918e-05, - "loss": 0.7019, - "step": 52980 - }, - { - "epoch": 0.46844887639456145, - "grad_norm": 1.855115532875061, - "learning_rate": 4.2192518726757314e-05, - "loss": 0.6623, - "step": 52990 - }, - { - "epoch": 0.4685372796548737, - "grad_norm": 2.5500354766845703, - "learning_rate": 4.219104533908544e-05, - "loss": 0.808, - "step": 53000 - }, - { - "epoch": 0.4686256829151859, - "grad_norm": 3.0885989665985107, - "learning_rate": 4.218957195141357e-05, - "loss": 0.5627, - "step": 53010 - }, - { - "epoch": 0.46871408617549815, - "grad_norm": 4.889555931091309, - "learning_rate": 4.21880985637417e-05, - "loss": 0.6399, - "step": 53020 - }, - { - "epoch": 0.4688024894358104, - "grad_norm": 2.5776076316833496, - "learning_rate": 4.218662517606983e-05, - "loss": 0.5943, - "step": 53030 - }, - { - "epoch": 0.4688908926961226, - "grad_norm": 3.828183174133301, - "learning_rate": 4.2185151788397955e-05, - "loss": 0.7725, - "step": 53040 - }, - { - "epoch": 0.4689792959564349, - "grad_norm": 4.145967960357666, - "learning_rate": 4.218367840072609e-05, - "loss": 0.6897, - "step": 53050 - }, - { - "epoch": 0.4690676992167471, - "grad_norm": 5.40083122253418, - "learning_rate": 4.218220501305422e-05, - "loss": 0.7789, - "step": 53060 - }, - { - "epoch": 0.46915610247705936, - "grad_norm": 8.548382759094238, - "learning_rate": 4.218073162538235e-05, - "loss": 0.6406, - "step": 53070 - }, - { - "epoch": 0.4692445057373716, - "grad_norm": 1.5795224905014038, - "learning_rate": 4.2179258237710476e-05, - "loss": 0.7382, - "step": 53080 - }, - { - "epoch": 0.4693329089976838, - "grad_norm": 4.020104885101318, - "learning_rate": 4.2177784850038604e-05, - "loss": 0.532, - "step": 53090 - }, - { - "epoch": 0.46942131225799605, - "grad_norm": 2.954549551010132, - "learning_rate": 4.217631146236673e-05, - "loss": 0.7156, - "step": 53100 - }, - { - "epoch": 0.46950971551830833, - "grad_norm": 7.192239761352539, - "learning_rate": 4.217483807469486e-05, - "loss": 0.7216, - "step": 53110 - }, - { - "epoch": 0.46959811877862057, - "grad_norm": 5.926204681396484, - "learning_rate": 4.2173364687022996e-05, - "loss": 0.6224, - "step": 53120 - }, - { - "epoch": 0.4696865220389328, - "grad_norm": 5.667525768280029, - "learning_rate": 4.2171891299351124e-05, - "loss": 0.6615, - "step": 53130 - }, - { - "epoch": 0.469774925299245, - "grad_norm": 4.380832672119141, - "learning_rate": 4.217041791167925e-05, - "loss": 0.6975, - "step": 53140 - }, - { - "epoch": 0.46986332855955726, - "grad_norm": 2.1343722343444824, - "learning_rate": 4.216894452400738e-05, - "loss": 0.7124, - "step": 53150 - }, - { - "epoch": 0.4699517318198695, - "grad_norm": 2.3305907249450684, - "learning_rate": 4.216747113633551e-05, - "loss": 0.8327, - "step": 53160 - }, - { - "epoch": 0.4700401350801818, - "grad_norm": 2.136439561843872, - "learning_rate": 4.216599774866364e-05, - "loss": 0.7655, - "step": 53170 - }, - { - "epoch": 0.470128538340494, - "grad_norm": 12.449284553527832, - "learning_rate": 4.216452436099177e-05, - "loss": 0.7124, - "step": 53180 - }, - { - "epoch": 0.47021694160080624, - "grad_norm": 3.001804828643799, - "learning_rate": 4.2163050973319894e-05, - "loss": 0.6815, - "step": 53190 - }, - { - "epoch": 0.47030534486111847, - "grad_norm": 2.120994806289673, - "learning_rate": 4.216157758564803e-05, - "loss": 0.7672, - "step": 53200 - }, - { - "epoch": 0.4703937481214307, - "grad_norm": 3.306929349899292, - "learning_rate": 4.216010419797616e-05, - "loss": 0.7134, - "step": 53210 - }, - { - "epoch": 0.470482151381743, - "grad_norm": 5.747926235198975, - "learning_rate": 4.2158630810304286e-05, - "loss": 0.7064, - "step": 53220 - }, - { - "epoch": 0.4705705546420552, - "grad_norm": 2.683472156524658, - "learning_rate": 4.2157157422632414e-05, - "loss": 0.756, - "step": 53230 - }, - { - "epoch": 0.47065895790236745, - "grad_norm": 2.397841215133667, - "learning_rate": 4.215568403496055e-05, - "loss": 0.7754, - "step": 53240 - }, - { - "epoch": 0.4707473611626797, - "grad_norm": 1.5261938571929932, - "learning_rate": 4.215421064728867e-05, - "loss": 0.6572, - "step": 53250 - }, - { - "epoch": 0.4708357644229919, - "grad_norm": 7.471903324127197, - "learning_rate": 4.2152737259616806e-05, - "loss": 0.8332, - "step": 53260 - }, - { - "epoch": 0.47092416768330414, - "grad_norm": 5.161137104034424, - "learning_rate": 4.2151263871944935e-05, - "loss": 0.6651, - "step": 53270 - }, - { - "epoch": 0.4710125709436164, - "grad_norm": 2.039276599884033, - "learning_rate": 4.214979048427306e-05, - "loss": 0.6569, - "step": 53280 - }, - { - "epoch": 0.47110097420392866, - "grad_norm": 3.896923303604126, - "learning_rate": 4.214831709660119e-05, - "loss": 0.7154, - "step": 53290 - }, - { - "epoch": 0.4711893774642409, - "grad_norm": 6.703306198120117, - "learning_rate": 4.2146843708929326e-05, - "loss": 0.6368, - "step": 53300 - }, - { - "epoch": 0.4712777807245531, - "grad_norm": 1.7361499071121216, - "learning_rate": 4.214537032125745e-05, - "loss": 0.7386, - "step": 53310 - }, - { - "epoch": 0.47136618398486535, - "grad_norm": 4.387570381164551, - "learning_rate": 4.214389693358558e-05, - "loss": 0.7631, - "step": 53320 - }, - { - "epoch": 0.4714545872451776, - "grad_norm": 4.573794364929199, - "learning_rate": 4.2142423545913705e-05, - "loss": 0.7806, - "step": 53330 - }, - { - "epoch": 0.47154299050548987, - "grad_norm": 5.908381462097168, - "learning_rate": 4.214095015824184e-05, - "loss": 0.6827, - "step": 53340 - }, - { - "epoch": 0.4716313937658021, - "grad_norm": 4.799923419952393, - "learning_rate": 4.213947677056997e-05, - "loss": 0.6317, - "step": 53350 - }, - { - "epoch": 0.47171979702611433, - "grad_norm": 4.03240442276001, - "learning_rate": 4.2138003382898097e-05, - "loss": 0.8315, - "step": 53360 - }, - { - "epoch": 0.47180820028642656, - "grad_norm": 1.7955182790756226, - "learning_rate": 4.2136529995226225e-05, - "loss": 0.6812, - "step": 53370 - }, - { - "epoch": 0.4718966035467388, - "grad_norm": 3.1082003116607666, - "learning_rate": 4.213505660755436e-05, - "loss": 0.7699, - "step": 53380 - }, - { - "epoch": 0.471985006807051, - "grad_norm": 7.408374786376953, - "learning_rate": 4.213358321988248e-05, - "loss": 0.709, - "step": 53390 - }, - { - "epoch": 0.4720734100673633, - "grad_norm": 1.9412243366241455, - "learning_rate": 4.213210983221062e-05, - "loss": 0.7306, - "step": 53400 - }, - { - "epoch": 0.47216181332767554, - "grad_norm": 5.350911617279053, - "learning_rate": 4.2130636444538745e-05, - "loss": 0.7458, - "step": 53410 - }, - { - "epoch": 0.47225021658798777, - "grad_norm": 5.449341297149658, - "learning_rate": 4.2129163056866873e-05, - "loss": 0.6542, - "step": 53420 - }, - { - "epoch": 0.4723386198483, - "grad_norm": 8.928043365478516, - "learning_rate": 4.2127689669195e-05, - "loss": 0.72, - "step": 53430 - }, - { - "epoch": 0.47242702310861223, - "grad_norm": 8.926413536071777, - "learning_rate": 4.212621628152313e-05, - "loss": 0.6064, - "step": 53440 - }, - { - "epoch": 0.47251542636892446, - "grad_norm": 4.622246265411377, - "learning_rate": 4.212474289385126e-05, - "loss": 0.6907, - "step": 53450 - }, - { - "epoch": 0.47260382962923675, - "grad_norm": 3.0518133640289307, - "learning_rate": 4.2123269506179394e-05, - "loss": 0.7999, - "step": 53460 - }, - { - "epoch": 0.472692232889549, - "grad_norm": 2.4066314697265625, - "learning_rate": 4.2121796118507515e-05, - "loss": 0.6944, - "step": 53470 - }, - { - "epoch": 0.4727806361498612, - "grad_norm": 2.3513970375061035, - "learning_rate": 4.212032273083565e-05, - "loss": 0.7082, - "step": 53480 - }, - { - "epoch": 0.47286903941017344, - "grad_norm": 2.125847816467285, - "learning_rate": 4.211884934316378e-05, - "loss": 0.6513, - "step": 53490 - }, - { - "epoch": 0.4729574426704857, - "grad_norm": 2.1552071571350098, - "learning_rate": 4.211737595549191e-05, - "loss": 0.5535, - "step": 53500 - }, - { - "epoch": 0.4730458459307979, - "grad_norm": 6.239655017852783, - "learning_rate": 4.2115902567820035e-05, - "loss": 0.8503, - "step": 53510 - }, - { - "epoch": 0.4731342491911102, - "grad_norm": 7.939951419830322, - "learning_rate": 4.211442918014817e-05, - "loss": 0.6466, - "step": 53520 - }, - { - "epoch": 0.4732226524514224, - "grad_norm": 2.3682501316070557, - "learning_rate": 4.211295579247629e-05, - "loss": 0.777, - "step": 53530 - }, - { - "epoch": 0.47331105571173465, - "grad_norm": 7.721220970153809, - "learning_rate": 4.211148240480443e-05, - "loss": 0.718, - "step": 53540 - }, - { - "epoch": 0.4733994589720469, - "grad_norm": 2.304795742034912, - "learning_rate": 4.211000901713255e-05, - "loss": 0.7109, - "step": 53550 - }, - { - "epoch": 0.4734878622323591, - "grad_norm": 5.924191951751709, - "learning_rate": 4.2108535629460684e-05, - "loss": 0.6815, - "step": 53560 - }, - { - "epoch": 0.47357626549267134, - "grad_norm": 4.856740474700928, - "learning_rate": 4.210706224178881e-05, - "loss": 0.7755, - "step": 53570 - }, - { - "epoch": 0.47366466875298363, - "grad_norm": 3.9173684120178223, - "learning_rate": 4.210558885411694e-05, - "loss": 0.7756, - "step": 53580 - }, - { - "epoch": 0.47375307201329586, - "grad_norm": 5.201943397521973, - "learning_rate": 4.210411546644507e-05, - "loss": 0.679, - "step": 53590 - }, - { - "epoch": 0.4738414752736081, - "grad_norm": 3.4333572387695312, - "learning_rate": 4.2102642078773204e-05, - "loss": 0.7432, - "step": 53600 - }, - { - "epoch": 0.4739298785339203, - "grad_norm": 9.132095336914062, - "learning_rate": 4.2101168691101326e-05, - "loss": 0.8044, - "step": 53610 - }, - { - "epoch": 0.47401828179423255, - "grad_norm": 4.284627437591553, - "learning_rate": 4.209969530342946e-05, - "loss": 0.6837, - "step": 53620 - }, - { - "epoch": 0.4741066850545448, - "grad_norm": 2.861417531967163, - "learning_rate": 4.209822191575759e-05, - "loss": 0.7408, - "step": 53630 - }, - { - "epoch": 0.47419508831485707, - "grad_norm": 5.994079113006592, - "learning_rate": 4.209674852808572e-05, - "loss": 0.7608, - "step": 53640 - }, - { - "epoch": 0.4742834915751693, - "grad_norm": 5.555888652801514, - "learning_rate": 4.2095275140413846e-05, - "loss": 0.6456, - "step": 53650 - }, - { - "epoch": 0.47437189483548153, - "grad_norm": 2.0312082767486572, - "learning_rate": 4.209380175274198e-05, - "loss": 0.6923, - "step": 53660 - }, - { - "epoch": 0.47446029809579376, - "grad_norm": 3.7952773571014404, - "learning_rate": 4.20923283650701e-05, - "loss": 0.7644, - "step": 53670 - }, - { - "epoch": 0.474548701356106, - "grad_norm": 4.994013786315918, - "learning_rate": 4.209085497739824e-05, - "loss": 0.7715, - "step": 53680 - }, - { - "epoch": 0.4746371046164182, - "grad_norm": 8.997997283935547, - "learning_rate": 4.208938158972636e-05, - "loss": 0.7227, - "step": 53690 - }, - { - "epoch": 0.4747255078767305, - "grad_norm": 2.1706600189208984, - "learning_rate": 4.2087908202054494e-05, - "loss": 0.6464, - "step": 53700 - }, - { - "epoch": 0.47481391113704274, - "grad_norm": 3.9491937160491943, - "learning_rate": 4.208643481438262e-05, - "loss": 0.7095, - "step": 53710 - }, - { - "epoch": 0.474902314397355, - "grad_norm": 3.4568047523498535, - "learning_rate": 4.208496142671075e-05, - "loss": 0.7505, - "step": 53720 - }, - { - "epoch": 0.4749907176576672, - "grad_norm": 6.685573577880859, - "learning_rate": 4.208348803903888e-05, - "loss": 0.7671, - "step": 53730 - }, - { - "epoch": 0.47507912091797944, - "grad_norm": 5.928294658660889, - "learning_rate": 4.2082014651367015e-05, - "loss": 0.7748, - "step": 53740 - }, - { - "epoch": 0.4751675241782917, - "grad_norm": 2.370215654373169, - "learning_rate": 4.2080541263695136e-05, - "loss": 0.6908, - "step": 53750 - }, - { - "epoch": 0.47525592743860395, - "grad_norm": 4.826587200164795, - "learning_rate": 4.207906787602327e-05, - "loss": 0.6833, - "step": 53760 - }, - { - "epoch": 0.4753443306989162, - "grad_norm": 8.44394302368164, - "learning_rate": 4.20775944883514e-05, - "loss": 0.8003, - "step": 53770 - }, - { - "epoch": 0.4754327339592284, - "grad_norm": 2.7617194652557373, - "learning_rate": 4.207612110067953e-05, - "loss": 0.598, - "step": 53780 - }, - { - "epoch": 0.47552113721954065, - "grad_norm": 1.5828273296356201, - "learning_rate": 4.2074647713007656e-05, - "loss": 0.7267, - "step": 53790 - }, - { - "epoch": 0.4756095404798529, - "grad_norm": 7.319702625274658, - "learning_rate": 4.2073174325335785e-05, - "loss": 0.6068, - "step": 53800 - }, - { - "epoch": 0.47569794374016516, - "grad_norm": 2.0254814624786377, - "learning_rate": 4.207170093766391e-05, - "loss": 0.7966, - "step": 53810 - }, - { - "epoch": 0.4757863470004774, - "grad_norm": 1.9124932289123535, - "learning_rate": 4.207022754999205e-05, - "loss": 0.8703, - "step": 53820 - }, - { - "epoch": 0.4758747502607896, - "grad_norm": 4.167019367218018, - "learning_rate": 4.206875416232017e-05, - "loss": 0.7182, - "step": 53830 - }, - { - "epoch": 0.47596315352110186, - "grad_norm": 0.9499847292900085, - "learning_rate": 4.2067280774648305e-05, - "loss": 0.6645, - "step": 53840 - }, - { - "epoch": 0.4760515567814141, - "grad_norm": 5.8929314613342285, - "learning_rate": 4.206580738697643e-05, - "loss": 0.6525, - "step": 53850 - }, - { - "epoch": 0.4761399600417263, - "grad_norm": 10.652838706970215, - "learning_rate": 4.206433399930456e-05, - "loss": 0.814, - "step": 53860 - }, - { - "epoch": 0.4762283633020386, - "grad_norm": 2.5791962146759033, - "learning_rate": 4.206286061163269e-05, - "loss": 0.6601, - "step": 53870 - }, - { - "epoch": 0.47631676656235084, - "grad_norm": 3.0517194271087646, - "learning_rate": 4.2061387223960825e-05, - "loss": 0.7004, - "step": 53880 - }, - { - "epoch": 0.47640516982266307, - "grad_norm": 2.1429057121276855, - "learning_rate": 4.205991383628895e-05, - "loss": 0.5849, - "step": 53890 - }, - { - "epoch": 0.4764935730829753, - "grad_norm": 1.8528274297714233, - "learning_rate": 4.205844044861708e-05, - "loss": 0.6686, - "step": 53900 - }, - { - "epoch": 0.47658197634328753, - "grad_norm": 7.8360276222229, - "learning_rate": 4.205696706094521e-05, - "loss": 0.6581, - "step": 53910 - }, - { - "epoch": 0.47667037960359976, - "grad_norm": 2.7732481956481934, - "learning_rate": 4.205549367327334e-05, - "loss": 0.6492, - "step": 53920 - }, - { - "epoch": 0.47675878286391205, - "grad_norm": 4.227574825286865, - "learning_rate": 4.205402028560147e-05, - "loss": 0.7264, - "step": 53930 - }, - { - "epoch": 0.4768471861242243, - "grad_norm": 3.157350778579712, - "learning_rate": 4.2052546897929595e-05, - "loss": 0.7311, - "step": 53940 - }, - { - "epoch": 0.4769355893845365, - "grad_norm": 4.121584892272949, - "learning_rate": 4.2051073510257724e-05, - "loss": 0.7377, - "step": 53950 - }, - { - "epoch": 0.47702399264484874, - "grad_norm": 6.046733856201172, - "learning_rate": 4.204960012258586e-05, - "loss": 0.577, - "step": 53960 - }, - { - "epoch": 0.47711239590516097, - "grad_norm": 2.53169322013855, - "learning_rate": 4.204812673491399e-05, - "loss": 0.62, - "step": 53970 - }, - { - "epoch": 0.4772007991654732, - "grad_norm": 13.655868530273438, - "learning_rate": 4.2046653347242115e-05, - "loss": 0.6317, - "step": 53980 - }, - { - "epoch": 0.4772892024257855, - "grad_norm": 1.9250119924545288, - "learning_rate": 4.2045179959570244e-05, - "loss": 0.6965, - "step": 53990 - }, - { - "epoch": 0.4773776056860977, - "grad_norm": 16.838058471679688, - "learning_rate": 4.204370657189837e-05, - "loss": 0.7622, - "step": 54000 - }, - { - "epoch": 0.47746600894640995, - "grad_norm": 2.1734912395477295, - "learning_rate": 4.20422331842265e-05, - "loss": 0.8387, - "step": 54010 - }, - { - "epoch": 0.4775544122067222, - "grad_norm": 8.551667213439941, - "learning_rate": 4.204075979655463e-05, - "loss": 0.6874, - "step": 54020 - }, - { - "epoch": 0.4776428154670344, - "grad_norm": 2.9934494495391846, - "learning_rate": 4.2039286408882764e-05, - "loss": 0.6875, - "step": 54030 - }, - { - "epoch": 0.47773121872734664, - "grad_norm": 4.535610198974609, - "learning_rate": 4.203781302121089e-05, - "loss": 0.6902, - "step": 54040 - }, - { - "epoch": 0.4778196219876589, - "grad_norm": 4.351871490478516, - "learning_rate": 4.203633963353902e-05, - "loss": 0.7331, - "step": 54050 - }, - { - "epoch": 0.47790802524797116, - "grad_norm": 2.035493850708008, - "learning_rate": 4.203486624586715e-05, - "loss": 0.6733, - "step": 54060 - }, - { - "epoch": 0.4779964285082834, - "grad_norm": 4.51971435546875, - "learning_rate": 4.203339285819528e-05, - "loss": 0.6584, - "step": 54070 - }, - { - "epoch": 0.4780848317685956, - "grad_norm": 14.118454933166504, - "learning_rate": 4.2031919470523406e-05, - "loss": 0.8002, - "step": 54080 - }, - { - "epoch": 0.47817323502890785, - "grad_norm": 5.4625091552734375, - "learning_rate": 4.203044608285154e-05, - "loss": 0.7582, - "step": 54090 - }, - { - "epoch": 0.4782616382892201, - "grad_norm": 2.040560245513916, - "learning_rate": 4.202897269517967e-05, - "loss": 0.6083, - "step": 54100 - }, - { - "epoch": 0.47835004154953237, - "grad_norm": 3.6075687408447266, - "learning_rate": 4.20274993075078e-05, - "loss": 0.6612, - "step": 54110 - }, - { - "epoch": 0.4784384448098446, - "grad_norm": 10.534326553344727, - "learning_rate": 4.2026025919835926e-05, - "loss": 0.7524, - "step": 54120 - }, - { - "epoch": 0.47852684807015683, - "grad_norm": 1.964032530784607, - "learning_rate": 4.2024552532164054e-05, - "loss": 0.7115, - "step": 54130 - }, - { - "epoch": 0.47861525133046906, - "grad_norm": 9.679758071899414, - "learning_rate": 4.202307914449218e-05, - "loss": 0.7912, - "step": 54140 - }, - { - "epoch": 0.4787036545907813, - "grad_norm": 3.8983168601989746, - "learning_rate": 4.202160575682032e-05, - "loss": 0.8151, - "step": 54150 - }, - { - "epoch": 0.4787920578510935, - "grad_norm": 1.4353677034378052, - "learning_rate": 4.202013236914844e-05, - "loss": 0.679, - "step": 54160 - }, - { - "epoch": 0.4788804611114058, - "grad_norm": 13.79578971862793, - "learning_rate": 4.2018658981476575e-05, - "loss": 0.6974, - "step": 54170 - }, - { - "epoch": 0.47896886437171804, - "grad_norm": 3.1516776084899902, - "learning_rate": 4.20171855938047e-05, - "loss": 0.6885, - "step": 54180 - }, - { - "epoch": 0.47905726763203027, - "grad_norm": 1.3675308227539062, - "learning_rate": 4.201571220613283e-05, - "loss": 0.7082, - "step": 54190 - }, - { - "epoch": 0.4791456708923425, - "grad_norm": 3.3600339889526367, - "learning_rate": 4.201423881846096e-05, - "loss": 0.8791, - "step": 54200 - }, - { - "epoch": 0.47923407415265473, - "grad_norm": 5.11338472366333, - "learning_rate": 4.2012765430789095e-05, - "loss": 0.8247, - "step": 54210 - }, - { - "epoch": 0.47932247741296696, - "grad_norm": 1.0892534255981445, - "learning_rate": 4.2011292043117216e-05, - "loss": 0.722, - "step": 54220 - }, - { - "epoch": 0.47941088067327925, - "grad_norm": 1.8006415367126465, - "learning_rate": 4.200981865544535e-05, - "loss": 0.6324, - "step": 54230 - }, - { - "epoch": 0.4794992839335915, - "grad_norm": 13.464760780334473, - "learning_rate": 4.200834526777348e-05, - "loss": 0.7503, - "step": 54240 - }, - { - "epoch": 0.4795876871939037, - "grad_norm": 3.999490976333618, - "learning_rate": 4.200687188010161e-05, - "loss": 0.5964, - "step": 54250 - }, - { - "epoch": 0.47967609045421594, - "grad_norm": 1.042232871055603, - "learning_rate": 4.2005398492429736e-05, - "loss": 0.5991, - "step": 54260 - }, - { - "epoch": 0.4797644937145282, - "grad_norm": 12.239282608032227, - "learning_rate": 4.2003925104757865e-05, - "loss": 0.7737, - "step": 54270 - }, - { - "epoch": 0.47985289697484046, - "grad_norm": 5.707892417907715, - "learning_rate": 4.200245171708599e-05, - "loss": 0.5718, - "step": 54280 - }, - { - "epoch": 0.4799413002351527, - "grad_norm": 10.65614128112793, - "learning_rate": 4.200097832941413e-05, - "loss": 0.7817, - "step": 54290 - }, - { - "epoch": 0.4800297034954649, - "grad_norm": 7.1056671142578125, - "learning_rate": 4.199950494174225e-05, - "loss": 0.6784, - "step": 54300 - }, - { - "epoch": 0.48011810675577715, - "grad_norm": 3.8355488777160645, - "learning_rate": 4.1998031554070385e-05, - "loss": 0.7311, - "step": 54310 - }, - { - "epoch": 0.4802065100160894, - "grad_norm": 1.3522825241088867, - "learning_rate": 4.199655816639851e-05, - "loss": 0.7119, - "step": 54320 - }, - { - "epoch": 0.4802949132764016, - "grad_norm": 1.1591590642929077, - "learning_rate": 4.199508477872664e-05, - "loss": 0.594, - "step": 54330 - }, - { - "epoch": 0.4803833165367139, - "grad_norm": 5.093685150146484, - "learning_rate": 4.199361139105477e-05, - "loss": 0.6828, - "step": 54340 - }, - { - "epoch": 0.48047171979702613, - "grad_norm": 3.0881261825561523, - "learning_rate": 4.1992138003382905e-05, - "loss": 0.7552, - "step": 54350 - }, - { - "epoch": 0.48056012305733836, - "grad_norm": 2.330265998840332, - "learning_rate": 4.199066461571103e-05, - "loss": 0.8338, - "step": 54360 - }, - { - "epoch": 0.4806485263176506, - "grad_norm": 2.2567107677459717, - "learning_rate": 4.198919122803916e-05, - "loss": 0.6752, - "step": 54370 - }, - { - "epoch": 0.4807369295779628, - "grad_norm": 4.572037220001221, - "learning_rate": 4.1987717840367283e-05, - "loss": 0.6666, - "step": 54380 - }, - { - "epoch": 0.48082533283827505, - "grad_norm": 8.050597190856934, - "learning_rate": 4.198624445269542e-05, - "loss": 0.7116, - "step": 54390 - }, - { - "epoch": 0.48091373609858734, - "grad_norm": 1.7825093269348145, - "learning_rate": 4.198477106502355e-05, - "loss": 0.6326, - "step": 54400 - }, - { - "epoch": 0.48100213935889957, - "grad_norm": 2.3205370903015137, - "learning_rate": 4.1983297677351675e-05, - "loss": 0.6829, - "step": 54410 - }, - { - "epoch": 0.4810905426192118, - "grad_norm": 3.3724260330200195, - "learning_rate": 4.1981824289679804e-05, - "loss": 0.5336, - "step": 54420 - }, - { - "epoch": 0.48117894587952403, - "grad_norm": 2.3022725582122803, - "learning_rate": 4.198035090200794e-05, - "loss": 0.6742, - "step": 54430 - }, - { - "epoch": 0.48126734913983626, - "grad_norm": 3.31050443649292, - "learning_rate": 4.197887751433606e-05, - "loss": 0.7448, - "step": 54440 - }, - { - "epoch": 0.4813557524001485, - "grad_norm": 6.843266487121582, - "learning_rate": 4.1977404126664196e-05, - "loss": 0.7601, - "step": 54450 - }, - { - "epoch": 0.4814441556604608, - "grad_norm": 4.714592933654785, - "learning_rate": 4.1975930738992324e-05, - "loss": 0.6474, - "step": 54460 - }, - { - "epoch": 0.481532558920773, - "grad_norm": 2.2546401023864746, - "learning_rate": 4.197445735132045e-05, - "loss": 0.7151, - "step": 54470 - }, - { - "epoch": 0.48162096218108524, - "grad_norm": 3.8935787677764893, - "learning_rate": 4.197298396364858e-05, - "loss": 0.6055, - "step": 54480 - }, - { - "epoch": 0.4817093654413975, - "grad_norm": 4.3352437019348145, - "learning_rate": 4.197151057597671e-05, - "loss": 0.6747, - "step": 54490 - }, - { - "epoch": 0.4817977687017097, - "grad_norm": 10.236634254455566, - "learning_rate": 4.197003718830484e-05, - "loss": 0.7769, - "step": 54500 - }, - { - "epoch": 0.48188617196202194, - "grad_norm": 2.106126070022583, - "learning_rate": 4.196856380063297e-05, - "loss": 0.7155, - "step": 54510 - }, - { - "epoch": 0.4819745752223342, - "grad_norm": 3.186664581298828, - "learning_rate": 4.1967090412961094e-05, - "loss": 0.5455, - "step": 54520 - }, - { - "epoch": 0.48206297848264645, - "grad_norm": 2.3363170623779297, - "learning_rate": 4.196561702528923e-05, - "loss": 0.6786, - "step": 54530 - }, - { - "epoch": 0.4821513817429587, - "grad_norm": 5.277554035186768, - "learning_rate": 4.196414363761736e-05, - "loss": 0.5956, - "step": 54540 - }, - { - "epoch": 0.4822397850032709, - "grad_norm": 14.230496406555176, - "learning_rate": 4.1962670249945486e-05, - "loss": 0.7322, - "step": 54550 - }, - { - "epoch": 0.48232818826358315, - "grad_norm": 4.711739540100098, - "learning_rate": 4.1961196862273614e-05, - "loss": 0.8439, - "step": 54560 - }, - { - "epoch": 0.4824165915238954, - "grad_norm": 1.5514178276062012, - "learning_rate": 4.195972347460175e-05, - "loss": 0.6972, - "step": 54570 - }, - { - "epoch": 0.48250499478420766, - "grad_norm": 3.1461331844329834, - "learning_rate": 4.195825008692987e-05, - "loss": 0.7505, - "step": 54580 - }, - { - "epoch": 0.4825933980445199, - "grad_norm": 2.6853485107421875, - "learning_rate": 4.1956776699258006e-05, - "loss": 0.6901, - "step": 54590 - }, - { - "epoch": 0.4826818013048321, - "grad_norm": 9.775922775268555, - "learning_rate": 4.1955303311586134e-05, - "loss": 0.6844, - "step": 54600 - }, - { - "epoch": 0.48277020456514436, - "grad_norm": 3.3453292846679688, - "learning_rate": 4.195382992391426e-05, - "loss": 0.6983, - "step": 54610 - }, - { - "epoch": 0.4828586078254566, - "grad_norm": 5.067368984222412, - "learning_rate": 4.195235653624239e-05, - "loss": 0.8372, - "step": 54620 - }, - { - "epoch": 0.4829470110857688, - "grad_norm": 7.5894575119018555, - "learning_rate": 4.195088314857052e-05, - "loss": 0.8135, - "step": 54630 - }, - { - "epoch": 0.4830354143460811, - "grad_norm": 3.6968748569488525, - "learning_rate": 4.194940976089865e-05, - "loss": 0.7562, - "step": 54640 - }, - { - "epoch": 0.48312381760639334, - "grad_norm": 2.677279472351074, - "learning_rate": 4.194793637322678e-05, - "loss": 0.6042, - "step": 54650 - }, - { - "epoch": 0.48321222086670557, - "grad_norm": 8.589305877685547, - "learning_rate": 4.1946462985554905e-05, - "loss": 0.5726, - "step": 54660 - }, - { - "epoch": 0.4833006241270178, - "grad_norm": 5.660636901855469, - "learning_rate": 4.194498959788304e-05, - "loss": 0.7243, - "step": 54670 - }, - { - "epoch": 0.48338902738733003, - "grad_norm": 1.8863121271133423, - "learning_rate": 4.194351621021117e-05, - "loss": 0.7849, - "step": 54680 - }, - { - "epoch": 0.48347743064764226, - "grad_norm": 6.596573352813721, - "learning_rate": 4.1942042822539296e-05, - "loss": 0.6581, - "step": 54690 - }, - { - "epoch": 0.48356583390795455, - "grad_norm": 1.5892181396484375, - "learning_rate": 4.1940569434867425e-05, - "loss": 0.6964, - "step": 54700 - }, - { - "epoch": 0.4836542371682668, - "grad_norm": 1.8390005826950073, - "learning_rate": 4.193909604719556e-05, - "loss": 0.6897, - "step": 54710 - }, - { - "epoch": 0.483742640428579, - "grad_norm": 11.260882377624512, - "learning_rate": 4.193762265952368e-05, - "loss": 0.6603, - "step": 54720 - }, - { - "epoch": 0.48383104368889124, - "grad_norm": 2.672907829284668, - "learning_rate": 4.1936149271851817e-05, - "loss": 0.7485, - "step": 54730 - }, - { - "epoch": 0.48391944694920347, - "grad_norm": 7.139774322509766, - "learning_rate": 4.193467588417994e-05, - "loss": 0.801, - "step": 54740 - }, - { - "epoch": 0.4840078502095157, - "grad_norm": 2.941298007965088, - "learning_rate": 4.193320249650807e-05, - "loss": 0.8014, - "step": 54750 - }, - { - "epoch": 0.484096253469828, - "grad_norm": 4.749353885650635, - "learning_rate": 4.19317291088362e-05, - "loss": 0.6926, - "step": 54760 - }, - { - "epoch": 0.4841846567301402, - "grad_norm": 11.97219467163086, - "learning_rate": 4.193025572116433e-05, - "loss": 0.6381, - "step": 54770 - }, - { - "epoch": 0.48427305999045245, - "grad_norm": 5.572201728820801, - "learning_rate": 4.192878233349246e-05, - "loss": 0.8293, - "step": 54780 - }, - { - "epoch": 0.4843614632507647, - "grad_norm": 2.2339625358581543, - "learning_rate": 4.1927308945820593e-05, - "loss": 0.6532, - "step": 54790 - }, - { - "epoch": 0.4844498665110769, - "grad_norm": 5.473230838775635, - "learning_rate": 4.1925835558148715e-05, - "loss": 0.7743, - "step": 54800 - }, - { - "epoch": 0.4845382697713892, - "grad_norm": 3.816479206085205, - "learning_rate": 4.192436217047685e-05, - "loss": 0.7157, - "step": 54810 - }, - { - "epoch": 0.4846266730317014, - "grad_norm": 11.660649299621582, - "learning_rate": 4.192288878280498e-05, - "loss": 0.6943, - "step": 54820 - }, - { - "epoch": 0.48471507629201366, - "grad_norm": 3.910614490509033, - "learning_rate": 4.192141539513311e-05, - "loss": 0.7379, - "step": 54830 - }, - { - "epoch": 0.4848034795523259, - "grad_norm": 4.9388108253479, - "learning_rate": 4.1919942007461235e-05, - "loss": 0.7465, - "step": 54840 - }, - { - "epoch": 0.4848918828126381, - "grad_norm": 10.465206146240234, - "learning_rate": 4.1918468619789364e-05, - "loss": 0.8192, - "step": 54850 - }, - { - "epoch": 0.48498028607295035, - "grad_norm": 2.099726438522339, - "learning_rate": 4.191699523211749e-05, - "loss": 0.7743, - "step": 54860 - }, - { - "epoch": 0.48506868933326264, - "grad_norm": 2.6266822814941406, - "learning_rate": 4.191552184444563e-05, - "loss": 0.6546, - "step": 54870 - }, - { - "epoch": 0.48515709259357487, - "grad_norm": 4.461912631988525, - "learning_rate": 4.1914048456773755e-05, - "loss": 0.7234, - "step": 54880 - }, - { - "epoch": 0.4852454958538871, - "grad_norm": 10.542143821716309, - "learning_rate": 4.1912575069101884e-05, - "loss": 0.6395, - "step": 54890 - }, - { - "epoch": 0.48533389911419933, - "grad_norm": 2.121877431869507, - "learning_rate": 4.191110168143001e-05, - "loss": 0.7799, - "step": 54900 - }, - { - "epoch": 0.48542230237451156, - "grad_norm": 5.219315052032471, - "learning_rate": 4.190962829375814e-05, - "loss": 0.6919, - "step": 54910 - }, - { - "epoch": 0.4855107056348238, - "grad_norm": 1.6760114431381226, - "learning_rate": 4.190815490608627e-05, - "loss": 0.632, - "step": 54920 - }, - { - "epoch": 0.4855991088951361, - "grad_norm": 6.953105449676514, - "learning_rate": 4.1906681518414404e-05, - "loss": 0.6959, - "step": 54930 - }, - { - "epoch": 0.4856875121554483, - "grad_norm": 2.8986353874206543, - "learning_rate": 4.190520813074253e-05, - "loss": 0.6063, - "step": 54940 - }, - { - "epoch": 0.48577591541576054, - "grad_norm": 8.168563842773438, - "learning_rate": 4.190373474307066e-05, - "loss": 0.721, - "step": 54950 - }, - { - "epoch": 0.48586431867607277, - "grad_norm": 2.3021225929260254, - "learning_rate": 4.190226135539879e-05, - "loss": 0.6756, - "step": 54960 - }, - { - "epoch": 0.485952721936385, - "grad_norm": 1.4999538660049438, - "learning_rate": 4.190078796772692e-05, - "loss": 0.6641, - "step": 54970 - }, - { - "epoch": 0.48604112519669723, - "grad_norm": 7.899734020233154, - "learning_rate": 4.1899314580055046e-05, - "loss": 0.6603, - "step": 54980 - }, - { - "epoch": 0.4861295284570095, - "grad_norm": 4.906933784484863, - "learning_rate": 4.1897841192383174e-05, - "loss": 0.6396, - "step": 54990 - }, - { - "epoch": 0.48621793171732175, - "grad_norm": 3.425278902053833, - "learning_rate": 4.189636780471131e-05, - "loss": 0.6654, - "step": 55000 - }, - { - "epoch": 0.486306334977634, - "grad_norm": 3.697119951248169, - "learning_rate": 4.189489441703944e-05, - "loss": 0.7726, - "step": 55010 - }, - { - "epoch": 0.4863947382379462, - "grad_norm": 9.480390548706055, - "learning_rate": 4.1893421029367566e-05, - "loss": 0.7102, - "step": 55020 - }, - { - "epoch": 0.48648314149825844, - "grad_norm": 6.128053665161133, - "learning_rate": 4.1891947641695694e-05, - "loss": 0.6335, - "step": 55030 - }, - { - "epoch": 0.4865715447585707, - "grad_norm": 3.5200870037078857, - "learning_rate": 4.189047425402382e-05, - "loss": 0.5863, - "step": 55040 - }, - { - "epoch": 0.48665994801888296, - "grad_norm": 2.784259557723999, - "learning_rate": 4.188900086635195e-05, - "loss": 0.6824, - "step": 55050 - }, - { - "epoch": 0.4867483512791952, - "grad_norm": 6.356380939483643, - "learning_rate": 4.1887527478680086e-05, - "loss": 0.6934, - "step": 55060 - }, - { - "epoch": 0.4868367545395074, - "grad_norm": 2.7658591270446777, - "learning_rate": 4.1886054091008214e-05, - "loss": 0.6902, - "step": 55070 - }, - { - "epoch": 0.48692515779981965, - "grad_norm": 4.423125267028809, - "learning_rate": 4.188458070333634e-05, - "loss": 0.6749, - "step": 55080 - }, - { - "epoch": 0.4870135610601319, - "grad_norm": 4.625323295593262, - "learning_rate": 4.188310731566447e-05, - "loss": 0.7453, - "step": 55090 - }, - { - "epoch": 0.4871019643204441, - "grad_norm": 4.361440658569336, - "learning_rate": 4.18816339279926e-05, - "loss": 0.705, - "step": 55100 - }, - { - "epoch": 0.4871903675807564, - "grad_norm": 4.610135078430176, - "learning_rate": 4.188016054032073e-05, - "loss": 0.7953, - "step": 55110 - }, - { - "epoch": 0.48727877084106863, - "grad_norm": 2.346588373184204, - "learning_rate": 4.187868715264886e-05, - "loss": 0.867, - "step": 55120 - }, - { - "epoch": 0.48736717410138086, - "grad_norm": 5.302258491516113, - "learning_rate": 4.1877213764976985e-05, - "loss": 0.7313, - "step": 55130 - }, - { - "epoch": 0.4874555773616931, - "grad_norm": 5.039346218109131, - "learning_rate": 4.187574037730512e-05, - "loss": 0.6289, - "step": 55140 - }, - { - "epoch": 0.4875439806220053, - "grad_norm": 7.583291053771973, - "learning_rate": 4.187426698963325e-05, - "loss": 0.6106, - "step": 55150 - }, - { - "epoch": 0.48763238388231755, - "grad_norm": 2.6910438537597656, - "learning_rate": 4.1872793601961376e-05, - "loss": 0.7304, - "step": 55160 - }, - { - "epoch": 0.48772078714262984, - "grad_norm": 3.1498353481292725, - "learning_rate": 4.1871320214289505e-05, - "loss": 0.6721, - "step": 55170 - }, - { - "epoch": 0.48780919040294207, - "grad_norm": 4.106415748596191, - "learning_rate": 4.186984682661764e-05, - "loss": 0.7291, - "step": 55180 - }, - { - "epoch": 0.4878975936632543, - "grad_norm": 11.023696899414062, - "learning_rate": 4.186837343894576e-05, - "loss": 0.5497, - "step": 55190 - }, - { - "epoch": 0.48798599692356653, - "grad_norm": 2.3651418685913086, - "learning_rate": 4.1866900051273897e-05, - "loss": 0.6666, - "step": 55200 - }, - { - "epoch": 0.48807440018387876, - "grad_norm": 5.506601333618164, - "learning_rate": 4.186542666360202e-05, - "loss": 0.7184, - "step": 55210 - }, - { - "epoch": 0.488162803444191, - "grad_norm": 8.60173225402832, - "learning_rate": 4.186395327593015e-05, - "loss": 0.6957, - "step": 55220 - }, - { - "epoch": 0.4882512067045033, - "grad_norm": 2.281357765197754, - "learning_rate": 4.186247988825828e-05, - "loss": 0.6491, - "step": 55230 - }, - { - "epoch": 0.4883396099648155, - "grad_norm": 5.497363567352295, - "learning_rate": 4.186100650058641e-05, - "loss": 0.7456, - "step": 55240 - }, - { - "epoch": 0.48842801322512774, - "grad_norm": 6.958611965179443, - "learning_rate": 4.185953311291454e-05, - "loss": 0.6327, - "step": 55250 - }, - { - "epoch": 0.48851641648544, - "grad_norm": 6.949708938598633, - "learning_rate": 4.1858059725242674e-05, - "loss": 0.5946, - "step": 55260 - }, - { - "epoch": 0.4886048197457522, - "grad_norm": 2.0884857177734375, - "learning_rate": 4.1856586337570795e-05, - "loss": 0.8607, - "step": 55270 - }, - { - "epoch": 0.48869322300606444, - "grad_norm": 5.647246360778809, - "learning_rate": 4.185511294989893e-05, - "loss": 0.7551, - "step": 55280 - }, - { - "epoch": 0.4887816262663767, - "grad_norm": 1.4908417463302612, - "learning_rate": 4.185363956222706e-05, - "loss": 0.7701, - "step": 55290 - }, - { - "epoch": 0.48887002952668895, - "grad_norm": 1.7229188680648804, - "learning_rate": 4.185216617455519e-05, - "loss": 0.6892, - "step": 55300 - }, - { - "epoch": 0.4889584327870012, - "grad_norm": 3.431084632873535, - "learning_rate": 4.1850692786883315e-05, - "loss": 0.7167, - "step": 55310 - }, - { - "epoch": 0.4890468360473134, - "grad_norm": 2.7224199771881104, - "learning_rate": 4.1849219399211444e-05, - "loss": 0.6454, - "step": 55320 - }, - { - "epoch": 0.48913523930762565, - "grad_norm": 7.204508304595947, - "learning_rate": 4.184774601153957e-05, - "loss": 0.7286, - "step": 55330 - }, - { - "epoch": 0.48922364256793793, - "grad_norm": 8.568854331970215, - "learning_rate": 4.184627262386771e-05, - "loss": 0.6871, - "step": 55340 - }, - { - "epoch": 0.48931204582825016, - "grad_norm": 4.631820201873779, - "learning_rate": 4.184479923619583e-05, - "loss": 0.6819, - "step": 55350 - }, - { - "epoch": 0.4894004490885624, - "grad_norm": 7.1135640144348145, - "learning_rate": 4.1843325848523964e-05, - "loss": 0.8508, - "step": 55360 - }, - { - "epoch": 0.4894888523488746, - "grad_norm": 4.763911247253418, - "learning_rate": 4.184185246085209e-05, - "loss": 0.7262, - "step": 55370 - }, - { - "epoch": 0.48957725560918686, - "grad_norm": 1.350186824798584, - "learning_rate": 4.184037907318022e-05, - "loss": 0.6756, - "step": 55380 - }, - { - "epoch": 0.4896656588694991, - "grad_norm": 7.568343162536621, - "learning_rate": 4.183890568550835e-05, - "loss": 0.7142, - "step": 55390 - }, - { - "epoch": 0.4897540621298114, - "grad_norm": 7.722400188446045, - "learning_rate": 4.1837432297836484e-05, - "loss": 0.67, - "step": 55400 - }, - { - "epoch": 0.4898424653901236, - "grad_norm": 5.146885395050049, - "learning_rate": 4.1835958910164606e-05, - "loss": 0.6448, - "step": 55410 - }, - { - "epoch": 0.48993086865043584, - "grad_norm": 1.5060570240020752, - "learning_rate": 4.183448552249274e-05, - "loss": 0.7488, - "step": 55420 - }, - { - "epoch": 0.49001927191074807, - "grad_norm": 4.813604831695557, - "learning_rate": 4.183301213482086e-05, - "loss": 0.7058, - "step": 55430 - }, - { - "epoch": 0.4901076751710603, - "grad_norm": 3.104266405105591, - "learning_rate": 4.1831538747149e-05, - "loss": 0.7286, - "step": 55440 - }, - { - "epoch": 0.49019607843137253, - "grad_norm": 1.2457255125045776, - "learning_rate": 4.1830065359477126e-05, - "loss": 0.6328, - "step": 55450 - }, - { - "epoch": 0.4902844816916848, - "grad_norm": 2.3449554443359375, - "learning_rate": 4.1828591971805254e-05, - "loss": 0.6452, - "step": 55460 - }, - { - "epoch": 0.49037288495199705, - "grad_norm": 2.7399024963378906, - "learning_rate": 4.182711858413338e-05, - "loss": 0.8105, - "step": 55470 - }, - { - "epoch": 0.4904612882123093, - "grad_norm": 11.01954174041748, - "learning_rate": 4.182564519646152e-05, - "loss": 0.6609, - "step": 55480 - }, - { - "epoch": 0.4905496914726215, - "grad_norm": 7.126019477844238, - "learning_rate": 4.182417180878964e-05, - "loss": 0.775, - "step": 55490 - }, - { - "epoch": 0.49063809473293374, - "grad_norm": 2.2502033710479736, - "learning_rate": 4.1822698421117774e-05, - "loss": 0.5554, - "step": 55500 - }, - { - "epoch": 0.49072649799324597, - "grad_norm": 10.011443138122559, - "learning_rate": 4.18212250334459e-05, - "loss": 0.7572, - "step": 55510 - }, - { - "epoch": 0.49081490125355826, - "grad_norm": 2.1530823707580566, - "learning_rate": 4.181975164577403e-05, - "loss": 0.6739, - "step": 55520 - }, - { - "epoch": 0.4909033045138705, - "grad_norm": 13.02297306060791, - "learning_rate": 4.181827825810216e-05, - "loss": 0.7904, - "step": 55530 - }, - { - "epoch": 0.4909917077741827, - "grad_norm": 6.199241638183594, - "learning_rate": 4.1816804870430295e-05, - "loss": 0.6325, - "step": 55540 - }, - { - "epoch": 0.49108011103449495, - "grad_norm": 8.223472595214844, - "learning_rate": 4.1815331482758416e-05, - "loss": 0.6923, - "step": 55550 - }, - { - "epoch": 0.4911685142948072, - "grad_norm": 2.7837607860565186, - "learning_rate": 4.181385809508655e-05, - "loss": 0.7552, - "step": 55560 - }, - { - "epoch": 0.4912569175551194, - "grad_norm": 9.880025863647461, - "learning_rate": 4.181238470741467e-05, - "loss": 0.5899, - "step": 55570 - }, - { - "epoch": 0.4913453208154317, - "grad_norm": 8.495399475097656, - "learning_rate": 4.181091131974281e-05, - "loss": 0.6938, - "step": 55580 - }, - { - "epoch": 0.4914337240757439, - "grad_norm": 5.334437370300293, - "learning_rate": 4.1809437932070936e-05, - "loss": 0.7148, - "step": 55590 - }, - { - "epoch": 0.49152212733605616, - "grad_norm": 2.6492724418640137, - "learning_rate": 4.1807964544399065e-05, - "loss": 0.716, - "step": 55600 - }, - { - "epoch": 0.4916105305963684, - "grad_norm": 3.826448678970337, - "learning_rate": 4.180649115672719e-05, - "loss": 0.8247, - "step": 55610 - }, - { - "epoch": 0.4916989338566806, - "grad_norm": 2.4406564235687256, - "learning_rate": 4.180501776905533e-05, - "loss": 0.6665, - "step": 55620 - }, - { - "epoch": 0.49178733711699285, - "grad_norm": 1.7770804166793823, - "learning_rate": 4.180354438138345e-05, - "loss": 0.7198, - "step": 55630 - }, - { - "epoch": 0.49187574037730514, - "grad_norm": 2.9972198009490967, - "learning_rate": 4.1802070993711585e-05, - "loss": 0.6669, - "step": 55640 - }, - { - "epoch": 0.49196414363761737, - "grad_norm": 8.456132888793945, - "learning_rate": 4.180059760603971e-05, - "loss": 0.6704, - "step": 55650 - }, - { - "epoch": 0.4920525468979296, - "grad_norm": 10.448269844055176, - "learning_rate": 4.179912421836784e-05, - "loss": 0.646, - "step": 55660 - }, - { - "epoch": 0.49214095015824183, - "grad_norm": 5.333916664123535, - "learning_rate": 4.179765083069597e-05, - "loss": 0.5548, - "step": 55670 - }, - { - "epoch": 0.49222935341855406, - "grad_norm": 6.754070281982422, - "learning_rate": 4.17961774430241e-05, - "loss": 0.7663, - "step": 55680 - }, - { - "epoch": 0.4923177566788663, - "grad_norm": 9.926854133605957, - "learning_rate": 4.179470405535223e-05, - "loss": 0.6625, - "step": 55690 - }, - { - "epoch": 0.4924061599391786, - "grad_norm": 3.9071664810180664, - "learning_rate": 4.179323066768036e-05, - "loss": 0.7274, - "step": 55700 - }, - { - "epoch": 0.4924945631994908, - "grad_norm": 1.381919503211975, - "learning_rate": 4.179175728000848e-05, - "loss": 0.6246, - "step": 55710 - }, - { - "epoch": 0.49258296645980304, - "grad_norm": 7.704475402832031, - "learning_rate": 4.179028389233662e-05, - "loss": 0.7636, - "step": 55720 - }, - { - "epoch": 0.49267136972011527, - "grad_norm": 6.495126724243164, - "learning_rate": 4.178881050466475e-05, - "loss": 0.696, - "step": 55730 - }, - { - "epoch": 0.4927597729804275, - "grad_norm": 1.3690553903579712, - "learning_rate": 4.1787337116992875e-05, - "loss": 0.7047, - "step": 55740 - }, - { - "epoch": 0.49284817624073973, - "grad_norm": 2.840855598449707, - "learning_rate": 4.1785863729321004e-05, - "loss": 0.6929, - "step": 55750 - }, - { - "epoch": 0.492936579501052, - "grad_norm": 4.008294105529785, - "learning_rate": 4.178439034164914e-05, - "loss": 0.7485, - "step": 55760 - }, - { - "epoch": 0.49302498276136425, - "grad_norm": 0.9874984622001648, - "learning_rate": 4.178291695397726e-05, - "loss": 0.8122, - "step": 55770 - }, - { - "epoch": 0.4931133860216765, - "grad_norm": 3.377452850341797, - "learning_rate": 4.1781443566305395e-05, - "loss": 0.6806, - "step": 55780 - }, - { - "epoch": 0.4932017892819887, - "grad_norm": 1.7771506309509277, - "learning_rate": 4.1779970178633524e-05, - "loss": 0.6399, - "step": 55790 - }, - { - "epoch": 0.49329019254230094, - "grad_norm": 2.521798610687256, - "learning_rate": 4.177849679096165e-05, - "loss": 0.8092, - "step": 55800 - }, - { - "epoch": 0.4933785958026132, - "grad_norm": 3.044299840927124, - "learning_rate": 4.177702340328978e-05, - "loss": 0.6771, - "step": 55810 - }, - { - "epoch": 0.49346699906292546, - "grad_norm": 1.857763648033142, - "learning_rate": 4.177555001561791e-05, - "loss": 0.7844, - "step": 55820 - }, - { - "epoch": 0.4935554023232377, - "grad_norm": 4.613230228424072, - "learning_rate": 4.177407662794604e-05, - "loss": 0.7614, - "step": 55830 - }, - { - "epoch": 0.4936438055835499, - "grad_norm": 21.550128936767578, - "learning_rate": 4.177260324027417e-05, - "loss": 0.6442, - "step": 55840 - }, - { - "epoch": 0.49373220884386215, - "grad_norm": 1.7921669483184814, - "learning_rate": 4.17711298526023e-05, - "loss": 0.8092, - "step": 55850 - }, - { - "epoch": 0.4938206121041744, - "grad_norm": 1.795630931854248, - "learning_rate": 4.176965646493043e-05, - "loss": 0.672, - "step": 55860 - }, - { - "epoch": 0.49390901536448667, - "grad_norm": 4.374889850616455, - "learning_rate": 4.176818307725856e-05, - "loss": 0.6347, - "step": 55870 - }, - { - "epoch": 0.4939974186247989, - "grad_norm": 3.6233792304992676, - "learning_rate": 4.1766709689586686e-05, - "loss": 0.6676, - "step": 55880 - }, - { - "epoch": 0.49408582188511113, - "grad_norm": 14.791594505310059, - "learning_rate": 4.1765236301914814e-05, - "loss": 0.7522, - "step": 55890 - }, - { - "epoch": 0.49417422514542336, - "grad_norm": 9.890098571777344, - "learning_rate": 4.176376291424294e-05, - "loss": 0.7474, - "step": 55900 - }, - { - "epoch": 0.4942626284057356, - "grad_norm": 1.5646966695785522, - "learning_rate": 4.176228952657108e-05, - "loss": 0.6713, - "step": 55910 - }, - { - "epoch": 0.4943510316660478, - "grad_norm": 3.6969211101531982, - "learning_rate": 4.1760816138899206e-05, - "loss": 0.7218, - "step": 55920 - }, - { - "epoch": 0.4944394349263601, - "grad_norm": 1.3526560068130493, - "learning_rate": 4.1759342751227334e-05, - "loss": 0.7743, - "step": 55930 - }, - { - "epoch": 0.49452783818667234, - "grad_norm": 5.316917419433594, - "learning_rate": 4.175786936355546e-05, - "loss": 0.6966, - "step": 55940 - }, - { - "epoch": 0.4946162414469846, - "grad_norm": 4.94674825668335, - "learning_rate": 4.17563959758836e-05, - "loss": 0.7137, - "step": 55950 - }, - { - "epoch": 0.4947046447072968, - "grad_norm": 6.7031731605529785, - "learning_rate": 4.175492258821172e-05, - "loss": 0.8149, - "step": 55960 - }, - { - "epoch": 0.49479304796760903, - "grad_norm": 3.462838649749756, - "learning_rate": 4.1753449200539854e-05, - "loss": 0.7457, - "step": 55970 - }, - { - "epoch": 0.49488145122792127, - "grad_norm": 2.193026304244995, - "learning_rate": 4.175197581286798e-05, - "loss": 0.78, - "step": 55980 - }, - { - "epoch": 0.49496985448823355, - "grad_norm": 2.6484906673431396, - "learning_rate": 4.175050242519611e-05, - "loss": 0.5925, - "step": 55990 - }, - { - "epoch": 0.4950582577485458, - "grad_norm": 5.234287738800049, - "learning_rate": 4.174902903752424e-05, - "loss": 0.7181, - "step": 56000 - }, - { - "epoch": 0.495146661008858, - "grad_norm": 2.6760685443878174, - "learning_rate": 4.1747555649852375e-05, - "loss": 0.7013, - "step": 56010 - }, - { - "epoch": 0.49523506426917024, - "grad_norm": 2.5146236419677734, - "learning_rate": 4.1746082262180496e-05, - "loss": 0.7546, - "step": 56020 - }, - { - "epoch": 0.4953234675294825, - "grad_norm": 1.9755581617355347, - "learning_rate": 4.174460887450863e-05, - "loss": 0.7427, - "step": 56030 - }, - { - "epoch": 0.4954118707897947, - "grad_norm": 2.856619358062744, - "learning_rate": 4.174313548683675e-05, - "loss": 0.7434, - "step": 56040 - }, - { - "epoch": 0.495500274050107, - "grad_norm": 2.2519772052764893, - "learning_rate": 4.174166209916489e-05, - "loss": 0.6547, - "step": 56050 - }, - { - "epoch": 0.4955886773104192, - "grad_norm": 2.288954019546509, - "learning_rate": 4.1740188711493016e-05, - "loss": 0.7181, - "step": 56060 - }, - { - "epoch": 0.49567708057073145, - "grad_norm": 1.9550548791885376, - "learning_rate": 4.1738715323821145e-05, - "loss": 0.6374, - "step": 56070 - }, - { - "epoch": 0.4957654838310437, - "grad_norm": 3.3265535831451416, - "learning_rate": 4.173724193614927e-05, - "loss": 0.6593, - "step": 56080 - }, - { - "epoch": 0.4958538870913559, - "grad_norm": 1.2758525609970093, - "learning_rate": 4.173576854847741e-05, - "loss": 0.7149, - "step": 56090 - }, - { - "epoch": 0.49594229035166815, - "grad_norm": 3.466811180114746, - "learning_rate": 4.173429516080553e-05, - "loss": 0.6414, - "step": 56100 - }, - { - "epoch": 0.49603069361198043, - "grad_norm": 1.6507645845413208, - "learning_rate": 4.1732821773133665e-05, - "loss": 0.6309, - "step": 56110 - }, - { - "epoch": 0.49611909687229266, - "grad_norm": 1.9765758514404297, - "learning_rate": 4.173134838546179e-05, - "loss": 0.63, - "step": 56120 - }, - { - "epoch": 0.4962075001326049, - "grad_norm": 14.110745429992676, - "learning_rate": 4.172987499778992e-05, - "loss": 0.8376, - "step": 56130 - }, - { - "epoch": 0.4962959033929171, - "grad_norm": 2.5902881622314453, - "learning_rate": 4.172840161011805e-05, - "loss": 0.6001, - "step": 56140 - }, - { - "epoch": 0.49638430665322936, - "grad_norm": 1.9173308610916138, - "learning_rate": 4.172692822244618e-05, - "loss": 0.6811, - "step": 56150 - }, - { - "epoch": 0.4964727099135416, - "grad_norm": 1.4938806295394897, - "learning_rate": 4.172545483477431e-05, - "loss": 0.6975, - "step": 56160 - }, - { - "epoch": 0.4965611131738539, - "grad_norm": 3.743889331817627, - "learning_rate": 4.172398144710244e-05, - "loss": 0.678, - "step": 56170 - }, - { - "epoch": 0.4966495164341661, - "grad_norm": 3.569836378097534, - "learning_rate": 4.1722508059430563e-05, - "loss": 0.6603, - "step": 56180 - }, - { - "epoch": 0.49673791969447834, - "grad_norm": 5.14637565612793, - "learning_rate": 4.17210346717587e-05, - "loss": 0.7726, - "step": 56190 - }, - { - "epoch": 0.49682632295479057, - "grad_norm": 1.631245732307434, - "learning_rate": 4.171956128408683e-05, - "loss": 0.6322, - "step": 56200 - }, - { - "epoch": 0.4969147262151028, - "grad_norm": 1.28780996799469, - "learning_rate": 4.1718087896414955e-05, - "loss": 0.7523, - "step": 56210 - }, - { - "epoch": 0.49700312947541503, - "grad_norm": 3.152726411819458, - "learning_rate": 4.1716614508743084e-05, - "loss": 0.5857, - "step": 56220 - }, - { - "epoch": 0.4970915327357273, - "grad_norm": 8.108668327331543, - "learning_rate": 4.171514112107122e-05, - "loss": 0.7438, - "step": 56230 - }, - { - "epoch": 0.49717993599603955, - "grad_norm": 3.006171703338623, - "learning_rate": 4.171366773339934e-05, - "loss": 0.6027, - "step": 56240 - }, - { - "epoch": 0.4972683392563518, - "grad_norm": 8.034201622009277, - "learning_rate": 4.1712194345727475e-05, - "loss": 0.6328, - "step": 56250 - }, - { - "epoch": 0.497356742516664, - "grad_norm": 7.66131591796875, - "learning_rate": 4.17107209580556e-05, - "loss": 0.6001, - "step": 56260 - }, - { - "epoch": 0.49744514577697624, - "grad_norm": 1.8868650197982788, - "learning_rate": 4.170924757038373e-05, - "loss": 0.5932, - "step": 56270 - }, - { - "epoch": 0.49753354903728847, - "grad_norm": 4.654658317565918, - "learning_rate": 4.170777418271186e-05, - "loss": 0.6393, - "step": 56280 - }, - { - "epoch": 0.49762195229760076, - "grad_norm": 3.2896487712860107, - "learning_rate": 4.170630079503999e-05, - "loss": 0.7229, - "step": 56290 - }, - { - "epoch": 0.497710355557913, - "grad_norm": 3.0114638805389404, - "learning_rate": 4.170482740736812e-05, - "loss": 0.661, - "step": 56300 - }, - { - "epoch": 0.4977987588182252, - "grad_norm": 1.7431437969207764, - "learning_rate": 4.170335401969625e-05, - "loss": 0.6516, - "step": 56310 - }, - { - "epoch": 0.49788716207853745, - "grad_norm": 1.5742127895355225, - "learning_rate": 4.1701880632024374e-05, - "loss": 0.5967, - "step": 56320 - }, - { - "epoch": 0.4979755653388497, - "grad_norm": 3.6714813709259033, - "learning_rate": 4.170040724435251e-05, - "loss": 0.6864, - "step": 56330 - }, - { - "epoch": 0.4980639685991619, - "grad_norm": 3.167954683303833, - "learning_rate": 4.169893385668064e-05, - "loss": 0.7768, - "step": 56340 - }, - { - "epoch": 0.4981523718594742, - "grad_norm": 3.3005383014678955, - "learning_rate": 4.1697460469008766e-05, - "loss": 0.8772, - "step": 56350 - }, - { - "epoch": 0.4982407751197864, - "grad_norm": 3.6041250228881836, - "learning_rate": 4.1695987081336894e-05, - "loss": 0.9309, - "step": 56360 - }, - { - "epoch": 0.49832917838009866, - "grad_norm": 1.3319722414016724, - "learning_rate": 4.169451369366503e-05, - "loss": 0.6713, - "step": 56370 - }, - { - "epoch": 0.4984175816404109, - "grad_norm": 6.420892715454102, - "learning_rate": 4.169304030599315e-05, - "loss": 0.6064, - "step": 56380 - }, - { - "epoch": 0.4985059849007231, - "grad_norm": 12.575167655944824, - "learning_rate": 4.1691566918321286e-05, - "loss": 0.7888, - "step": 56390 - }, - { - "epoch": 0.4985943881610354, - "grad_norm": 3.8580946922302246, - "learning_rate": 4.169009353064941e-05, - "loss": 0.7808, - "step": 56400 - }, - { - "epoch": 0.49868279142134764, - "grad_norm": 2.1893560886383057, - "learning_rate": 4.168862014297754e-05, - "loss": 0.7169, - "step": 56410 - }, - { - "epoch": 0.49877119468165987, - "grad_norm": 3.957118511199951, - "learning_rate": 4.168714675530567e-05, - "loss": 0.7767, - "step": 56420 - }, - { - "epoch": 0.4988595979419721, - "grad_norm": 2.7286252975463867, - "learning_rate": 4.16856733676338e-05, - "loss": 0.7691, - "step": 56430 - }, - { - "epoch": 0.49894800120228433, - "grad_norm": 1.7598273754119873, - "learning_rate": 4.168419997996193e-05, - "loss": 0.6991, - "step": 56440 - }, - { - "epoch": 0.49903640446259656, - "grad_norm": 2.537155866622925, - "learning_rate": 4.168272659229006e-05, - "loss": 0.7169, - "step": 56450 - }, - { - "epoch": 0.49912480772290885, - "grad_norm": 10.953792572021484, - "learning_rate": 4.1681253204618184e-05, - "loss": 0.8031, - "step": 56460 - }, - { - "epoch": 0.4992132109832211, - "grad_norm": 4.062319755554199, - "learning_rate": 4.167977981694632e-05, - "loss": 0.5463, - "step": 56470 - }, - { - "epoch": 0.4993016142435333, - "grad_norm": 7.464442253112793, - "learning_rate": 4.167830642927445e-05, - "loss": 0.7858, - "step": 56480 - }, - { - "epoch": 0.49939001750384554, - "grad_norm": 7.360393047332764, - "learning_rate": 4.1676833041602576e-05, - "loss": 0.6219, - "step": 56490 - }, - { - "epoch": 0.49947842076415777, - "grad_norm": 3.858891248703003, - "learning_rate": 4.1675359653930705e-05, - "loss": 0.6408, - "step": 56500 - }, - { - "epoch": 0.49956682402447, - "grad_norm": 4.827006816864014, - "learning_rate": 4.167388626625883e-05, - "loss": 0.7617, - "step": 56510 - }, - { - "epoch": 0.4996552272847823, - "grad_norm": 1.9536699056625366, - "learning_rate": 4.167241287858696e-05, - "loss": 0.7106, - "step": 56520 - }, - { - "epoch": 0.4997436305450945, - "grad_norm": 6.272826671600342, - "learning_rate": 4.1670939490915096e-05, - "loss": 0.7382, - "step": 56530 - }, - { - "epoch": 0.49983203380540675, - "grad_norm": 3.2847611904144287, - "learning_rate": 4.166946610324322e-05, - "loss": 0.6765, - "step": 56540 - }, - { - "epoch": 0.499920437065719, - "grad_norm": 4.158660888671875, - "learning_rate": 4.166799271557135e-05, - "loss": 0.7309, - "step": 56550 - }, - { - "epoch": 0.5000088403260312, - "grad_norm": 3.1422619819641113, - "learning_rate": 4.166651932789948e-05, - "loss": 0.6606, - "step": 56560 - }, - { - "epoch": 0.5000972435863434, - "grad_norm": 5.493398189544678, - "learning_rate": 4.166504594022761e-05, - "loss": 0.7619, - "step": 56570 - }, - { - "epoch": 0.5001856468466557, - "grad_norm": 3.35305118560791, - "learning_rate": 4.166357255255574e-05, - "loss": 0.7336, - "step": 56580 - }, - { - "epoch": 0.5002740501069679, - "grad_norm": 1.8183269500732422, - "learning_rate": 4.166209916488387e-05, - "loss": 0.6442, - "step": 56590 - }, - { - "epoch": 0.5003624533672801, - "grad_norm": 8.141581535339355, - "learning_rate": 4.1660625777211995e-05, - "loss": 0.6402, - "step": 56600 - }, - { - "epoch": 0.5004508566275925, - "grad_norm": 3.1588070392608643, - "learning_rate": 4.165915238954013e-05, - "loss": 0.713, - "step": 56610 - }, - { - "epoch": 0.5005392598879047, - "grad_norm": 2.835822820663452, - "learning_rate": 4.165767900186825e-05, - "loss": 0.7602, - "step": 56620 - }, - { - "epoch": 0.5006276631482169, - "grad_norm": 8.367727279663086, - "learning_rate": 4.165620561419639e-05, - "loss": 0.7216, - "step": 56630 - }, - { - "epoch": 0.5007160664085292, - "grad_norm": 2.2879111766815186, - "learning_rate": 4.1654732226524515e-05, - "loss": 0.5942, - "step": 56640 - }, - { - "epoch": 0.5008044696688414, - "grad_norm": 2.3880019187927246, - "learning_rate": 4.1653258838852643e-05, - "loss": 0.712, - "step": 56650 - }, - { - "epoch": 0.5008928729291536, - "grad_norm": 2.2284128665924072, - "learning_rate": 4.165178545118077e-05, - "loss": 0.6762, - "step": 56660 - }, - { - "epoch": 0.5009812761894659, - "grad_norm": 19.109281539916992, - "learning_rate": 4.165031206350891e-05, - "loss": 0.5511, - "step": 56670 - }, - { - "epoch": 0.5010696794497781, - "grad_norm": 3.4032065868377686, - "learning_rate": 4.1648838675837035e-05, - "loss": 0.5864, - "step": 56680 - }, - { - "epoch": 0.5011580827100903, - "grad_norm": 4.226108551025391, - "learning_rate": 4.1647365288165164e-05, - "loss": 0.7623, - "step": 56690 - }, - { - "epoch": 0.5012464859704026, - "grad_norm": 6.362026214599609, - "learning_rate": 4.164589190049329e-05, - "loss": 0.7535, - "step": 56700 - }, - { - "epoch": 0.5013348892307148, - "grad_norm": 5.453469276428223, - "learning_rate": 4.164441851282142e-05, - "loss": 0.619, - "step": 56710 - }, - { - "epoch": 0.501423292491027, - "grad_norm": 8.311491966247559, - "learning_rate": 4.164294512514955e-05, - "loss": 0.8121, - "step": 56720 - }, - { - "epoch": 0.5015116957513394, - "grad_norm": 3.5187137126922607, - "learning_rate": 4.164147173747768e-05, - "loss": 0.6545, - "step": 56730 - }, - { - "epoch": 0.5016000990116516, - "grad_norm": 3.2361950874328613, - "learning_rate": 4.163999834980581e-05, - "loss": 0.6789, - "step": 56740 - }, - { - "epoch": 0.5016885022719638, - "grad_norm": 2.4146649837493896, - "learning_rate": 4.163852496213394e-05, - "loss": 0.6575, - "step": 56750 - }, - { - "epoch": 0.501776905532276, - "grad_norm": 4.072633266448975, - "learning_rate": 4.163705157446207e-05, - "loss": 0.7072, - "step": 56760 - }, - { - "epoch": 0.5018653087925883, - "grad_norm": 2.62351393699646, - "learning_rate": 4.16355781867902e-05, - "loss": 0.7861, - "step": 56770 - }, - { - "epoch": 0.5019537120529005, - "grad_norm": 1.3204588890075684, - "learning_rate": 4.1634104799118326e-05, - "loss": 0.6465, - "step": 56780 - }, - { - "epoch": 0.5020421153132127, - "grad_norm": 2.0702619552612305, - "learning_rate": 4.1632631411446454e-05, - "loss": 0.5643, - "step": 56790 - }, - { - "epoch": 0.502130518573525, - "grad_norm": 11.134904861450195, - "learning_rate": 4.163115802377459e-05, - "loss": 0.6589, - "step": 56800 - }, - { - "epoch": 0.5022189218338372, - "grad_norm": 12.244222640991211, - "learning_rate": 4.162968463610272e-05, - "loss": 0.8243, - "step": 56810 - }, - { - "epoch": 0.5023073250941494, - "grad_norm": 1.670577049255371, - "learning_rate": 4.1628211248430846e-05, - "loss": 0.6299, - "step": 56820 - }, - { - "epoch": 0.5023957283544617, - "grad_norm": 5.465409755706787, - "learning_rate": 4.1626737860758974e-05, - "loss": 0.8664, - "step": 56830 - }, - { - "epoch": 0.5024841316147739, - "grad_norm": 3.7542972564697266, - "learning_rate": 4.16252644730871e-05, - "loss": 0.7647, - "step": 56840 - }, - { - "epoch": 0.5025725348750862, - "grad_norm": 5.315617084503174, - "learning_rate": 4.162379108541523e-05, - "loss": 0.6223, - "step": 56850 - }, - { - "epoch": 0.5026609381353985, - "grad_norm": 9.695874214172363, - "learning_rate": 4.1622317697743366e-05, - "loss": 0.6954, - "step": 56860 - }, - { - "epoch": 0.5027493413957107, - "grad_norm": 3.826537847518921, - "learning_rate": 4.162084431007149e-05, - "loss": 0.6427, - "step": 56870 - }, - { - "epoch": 0.5028377446560229, - "grad_norm": 2.182504415512085, - "learning_rate": 4.161937092239962e-05, - "loss": 0.8364, - "step": 56880 - }, - { - "epoch": 0.5029261479163352, - "grad_norm": 3.3862853050231934, - "learning_rate": 4.161789753472775e-05, - "loss": 0.6931, - "step": 56890 - }, - { - "epoch": 0.5030145511766474, - "grad_norm": 3.5248701572418213, - "learning_rate": 4.161642414705588e-05, - "loss": 0.6316, - "step": 56900 - }, - { - "epoch": 0.5031029544369596, - "grad_norm": 8.183309555053711, - "learning_rate": 4.161495075938401e-05, - "loss": 0.724, - "step": 56910 - }, - { - "epoch": 0.5031913576972719, - "grad_norm": 3.988558530807495, - "learning_rate": 4.161347737171214e-05, - "loss": 0.7791, - "step": 56920 - }, - { - "epoch": 0.5032797609575841, - "grad_norm": 4.229586124420166, - "learning_rate": 4.1612003984040264e-05, - "loss": 0.5944, - "step": 56930 - }, - { - "epoch": 0.5033681642178963, - "grad_norm": 1.4051169157028198, - "learning_rate": 4.16105305963684e-05, - "loss": 0.6589, - "step": 56940 - }, - { - "epoch": 0.5034565674782085, - "grad_norm": 1.8541871309280396, - "learning_rate": 4.160905720869653e-05, - "loss": 0.61, - "step": 56950 - }, - { - "epoch": 0.5035449707385209, - "grad_norm": 2.2848703861236572, - "learning_rate": 4.1607583821024656e-05, - "loss": 0.6376, - "step": 56960 - }, - { - "epoch": 0.5036333739988331, - "grad_norm": 12.388575553894043, - "learning_rate": 4.1606110433352785e-05, - "loss": 0.6717, - "step": 56970 - }, - { - "epoch": 0.5037217772591454, - "grad_norm": 1.0625922679901123, - "learning_rate": 4.160463704568091e-05, - "loss": 0.5748, - "step": 56980 - }, - { - "epoch": 0.5038101805194576, - "grad_norm": 4.856298446655273, - "learning_rate": 4.160316365800904e-05, - "loss": 0.793, - "step": 56990 - }, - { - "epoch": 0.5038985837797698, - "grad_norm": 7.612451076507568, - "learning_rate": 4.1601690270337177e-05, - "loss": 0.7467, - "step": 57000 - }, - { - "epoch": 0.503986987040082, - "grad_norm": 6.634158611297607, - "learning_rate": 4.16002168826653e-05, - "loss": 0.8421, - "step": 57010 - }, - { - "epoch": 0.5040753903003943, - "grad_norm": 2.936037302017212, - "learning_rate": 4.159874349499343e-05, - "loss": 0.5945, - "step": 57020 - }, - { - "epoch": 0.5041637935607065, - "grad_norm": 5.316231727600098, - "learning_rate": 4.159727010732156e-05, - "loss": 0.7688, - "step": 57030 - }, - { - "epoch": 0.5042521968210187, - "grad_norm": 8.598160743713379, - "learning_rate": 4.159579671964969e-05, - "loss": 0.8109, - "step": 57040 - }, - { - "epoch": 0.504340600081331, - "grad_norm": 3.372987747192383, - "learning_rate": 4.159432333197782e-05, - "loss": 0.7786, - "step": 57050 - }, - { - "epoch": 0.5044290033416432, - "grad_norm": 3.2942261695861816, - "learning_rate": 4.1592849944305953e-05, - "loss": 0.6363, - "step": 57060 - }, - { - "epoch": 0.5045174066019554, - "grad_norm": 4.1155781745910645, - "learning_rate": 4.1591376556634075e-05, - "loss": 0.631, - "step": 57070 - }, - { - "epoch": 0.5046058098622678, - "grad_norm": 8.191411018371582, - "learning_rate": 4.158990316896221e-05, - "loss": 0.6696, - "step": 57080 - }, - { - "epoch": 0.50469421312258, - "grad_norm": 1.8772951364517212, - "learning_rate": 4.158842978129033e-05, - "loss": 0.6356, - "step": 57090 - }, - { - "epoch": 0.5047826163828922, - "grad_norm": 2.4823923110961914, - "learning_rate": 4.158695639361847e-05, - "loss": 0.7862, - "step": 57100 - }, - { - "epoch": 0.5048710196432045, - "grad_norm": 5.776914119720459, - "learning_rate": 4.1585483005946595e-05, - "loss": 0.7658, - "step": 57110 - }, - { - "epoch": 0.5049594229035167, - "grad_norm": 5.254019737243652, - "learning_rate": 4.1584009618274724e-05, - "loss": 0.6319, - "step": 57120 - }, - { - "epoch": 0.5050478261638289, - "grad_norm": 3.2729878425598145, - "learning_rate": 4.158253623060285e-05, - "loss": 0.7331, - "step": 57130 - }, - { - "epoch": 0.5051362294241412, - "grad_norm": 2.2706246376037598, - "learning_rate": 4.158106284293099e-05, - "loss": 0.7267, - "step": 57140 - }, - { - "epoch": 0.5052246326844534, - "grad_norm": 4.088400363922119, - "learning_rate": 4.157958945525911e-05, - "loss": 0.7926, - "step": 57150 - }, - { - "epoch": 0.5053130359447656, - "grad_norm": 3.899329423904419, - "learning_rate": 4.1578116067587244e-05, - "loss": 0.6409, - "step": 57160 - }, - { - "epoch": 0.5054014392050779, - "grad_norm": 2.1385231018066406, - "learning_rate": 4.157664267991537e-05, - "loss": 0.6898, - "step": 57170 - }, - { - "epoch": 0.5054898424653901, - "grad_norm": 7.217191696166992, - "learning_rate": 4.15751692922435e-05, - "loss": 0.6133, - "step": 57180 - }, - { - "epoch": 0.5055782457257023, - "grad_norm": 2.897397994995117, - "learning_rate": 4.157369590457163e-05, - "loss": 0.6874, - "step": 57190 - }, - { - "epoch": 0.5056666489860147, - "grad_norm": 3.4928135871887207, - "learning_rate": 4.157222251689976e-05, - "loss": 0.6597, - "step": 57200 - }, - { - "epoch": 0.5057550522463269, - "grad_norm": 4.433698654174805, - "learning_rate": 4.1570749129227885e-05, - "loss": 0.7171, - "step": 57210 - }, - { - "epoch": 0.5058434555066391, - "grad_norm": 5.090980529785156, - "learning_rate": 4.156927574155602e-05, - "loss": 0.6595, - "step": 57220 - }, - { - "epoch": 0.5059318587669513, - "grad_norm": 3.493039131164551, - "learning_rate": 4.156780235388414e-05, - "loss": 0.6937, - "step": 57230 - }, - { - "epoch": 0.5060202620272636, - "grad_norm": 3.3215017318725586, - "learning_rate": 4.156632896621228e-05, - "loss": 0.698, - "step": 57240 - }, - { - "epoch": 0.5061086652875758, - "grad_norm": 3.170927047729492, - "learning_rate": 4.1564855578540406e-05, - "loss": 0.7577, - "step": 57250 - }, - { - "epoch": 0.506197068547888, - "grad_norm": 2.4635794162750244, - "learning_rate": 4.1563382190868534e-05, - "loss": 0.6094, - "step": 57260 - }, - { - "epoch": 0.5062854718082003, - "grad_norm": 4.042263031005859, - "learning_rate": 4.156190880319666e-05, - "loss": 0.6449, - "step": 57270 - }, - { - "epoch": 0.5063738750685125, - "grad_norm": 1.4860950708389282, - "learning_rate": 4.15604354155248e-05, - "loss": 0.6807, - "step": 57280 - }, - { - "epoch": 0.5064622783288247, - "grad_norm": 2.3431918621063232, - "learning_rate": 4.155896202785292e-05, - "loss": 0.7994, - "step": 57290 - }, - { - "epoch": 0.506550681589137, - "grad_norm": 2.3435821533203125, - "learning_rate": 4.1557488640181054e-05, - "loss": 0.5329, - "step": 57300 - }, - { - "epoch": 0.5066390848494492, - "grad_norm": 6.458644866943359, - "learning_rate": 4.155601525250918e-05, - "loss": 0.7369, - "step": 57310 - }, - { - "epoch": 0.5067274881097615, - "grad_norm": 1.1299951076507568, - "learning_rate": 4.155454186483731e-05, - "loss": 0.7584, - "step": 57320 - }, - { - "epoch": 0.5068158913700738, - "grad_norm": 1.9832658767700195, - "learning_rate": 4.155306847716544e-05, - "loss": 0.6646, - "step": 57330 - }, - { - "epoch": 0.506904294630386, - "grad_norm": 4.427059173583984, - "learning_rate": 4.155159508949357e-05, - "loss": 0.6112, - "step": 57340 - }, - { - "epoch": 0.5069926978906982, - "grad_norm": 6.744326591491699, - "learning_rate": 4.1550121701821696e-05, - "loss": 0.7796, - "step": 57350 - }, - { - "epoch": 0.5070811011510105, - "grad_norm": 2.2644999027252197, - "learning_rate": 4.154864831414983e-05, - "loss": 0.6926, - "step": 57360 - }, - { - "epoch": 0.5071695044113227, - "grad_norm": 2.1050004959106445, - "learning_rate": 4.154717492647795e-05, - "loss": 0.7545, - "step": 57370 - }, - { - "epoch": 0.5072579076716349, - "grad_norm": 4.931227207183838, - "learning_rate": 4.154570153880609e-05, - "loss": 0.6872, - "step": 57380 - }, - { - "epoch": 0.5073463109319472, - "grad_norm": 6.39907693862915, - "learning_rate": 4.1544228151134216e-05, - "loss": 0.638, - "step": 57390 - }, - { - "epoch": 0.5074347141922594, - "grad_norm": 7.43491792678833, - "learning_rate": 4.1542754763462345e-05, - "loss": 0.7172, - "step": 57400 - }, - { - "epoch": 0.5075231174525716, - "grad_norm": 3.913090944290161, - "learning_rate": 4.154128137579047e-05, - "loss": 0.6168, - "step": 57410 - }, - { - "epoch": 0.5076115207128838, - "grad_norm": 15.152998924255371, - "learning_rate": 4.153980798811861e-05, - "loss": 0.7492, - "step": 57420 - }, - { - "epoch": 0.5076999239731961, - "grad_norm": 10.227533340454102, - "learning_rate": 4.153833460044673e-05, - "loss": 0.7735, - "step": 57430 - }, - { - "epoch": 0.5077883272335084, - "grad_norm": 2.000770330429077, - "learning_rate": 4.1536861212774865e-05, - "loss": 0.5962, - "step": 57440 - }, - { - "epoch": 0.5078767304938206, - "grad_norm": 1.3782382011413574, - "learning_rate": 4.1535387825102986e-05, - "loss": 0.7818, - "step": 57450 - }, - { - "epoch": 0.5079651337541329, - "grad_norm": 2.868053436279297, - "learning_rate": 4.153391443743112e-05, - "loss": 0.7098, - "step": 57460 - }, - { - "epoch": 0.5080535370144451, - "grad_norm": 3.6339824199676514, - "learning_rate": 4.153244104975925e-05, - "loss": 0.6535, - "step": 57470 - }, - { - "epoch": 0.5081419402747573, - "grad_norm": 3.4667751789093018, - "learning_rate": 4.153096766208738e-05, - "loss": 0.6274, - "step": 57480 - }, - { - "epoch": 0.5082303435350696, - "grad_norm": 2.1609416007995605, - "learning_rate": 4.1529494274415507e-05, - "loss": 0.7642, - "step": 57490 - }, - { - "epoch": 0.5083187467953818, - "grad_norm": 1.7735949754714966, - "learning_rate": 4.152802088674364e-05, - "loss": 0.6719, - "step": 57500 - }, - { - "epoch": 0.508407150055694, - "grad_norm": 3.4717016220092773, - "learning_rate": 4.152654749907176e-05, - "loss": 0.6177, - "step": 57510 - }, - { - "epoch": 0.5084955533160063, - "grad_norm": 1.4515308141708374, - "learning_rate": 4.15250741113999e-05, - "loss": 0.6885, - "step": 57520 - }, - { - "epoch": 0.5085839565763185, - "grad_norm": 2.896169662475586, - "learning_rate": 4.152360072372803e-05, - "loss": 0.6789, - "step": 57530 - }, - { - "epoch": 0.5086723598366307, - "grad_norm": 6.473743915557861, - "learning_rate": 4.1522127336056155e-05, - "loss": 0.657, - "step": 57540 - }, - { - "epoch": 0.5087607630969431, - "grad_norm": 2.919818878173828, - "learning_rate": 4.1520653948384283e-05, - "loss": 0.6598, - "step": 57550 - }, - { - "epoch": 0.5088491663572553, - "grad_norm": 4.229133605957031, - "learning_rate": 4.151918056071241e-05, - "loss": 0.806, - "step": 57560 - }, - { - "epoch": 0.5089375696175675, - "grad_norm": 10.783143997192383, - "learning_rate": 4.151770717304054e-05, - "loss": 0.6289, - "step": 57570 - }, - { - "epoch": 0.5090259728778798, - "grad_norm": 1.7085390090942383, - "learning_rate": 4.1516233785368675e-05, - "loss": 0.6389, - "step": 57580 - }, - { - "epoch": 0.509114376138192, - "grad_norm": 5.904126167297363, - "learning_rate": 4.1514760397696804e-05, - "loss": 0.81, - "step": 57590 - }, - { - "epoch": 0.5092027793985042, - "grad_norm": 3.2074472904205322, - "learning_rate": 4.151328701002493e-05, - "loss": 0.6909, - "step": 57600 - }, - { - "epoch": 0.5092911826588165, - "grad_norm": 6.975406646728516, - "learning_rate": 4.151181362235306e-05, - "loss": 0.7315, - "step": 57610 - }, - { - "epoch": 0.5093795859191287, - "grad_norm": 1.1402912139892578, - "learning_rate": 4.151034023468119e-05, - "loss": 0.8062, - "step": 57620 - }, - { - "epoch": 0.5094679891794409, - "grad_norm": 4.790241241455078, - "learning_rate": 4.150886684700932e-05, - "loss": 0.6636, - "step": 57630 - }, - { - "epoch": 0.5095563924397531, - "grad_norm": 5.190394401550293, - "learning_rate": 4.150739345933745e-05, - "loss": 0.5493, - "step": 57640 - }, - { - "epoch": 0.5096447957000654, - "grad_norm": 1.768310785293579, - "learning_rate": 4.150592007166558e-05, - "loss": 0.7117, - "step": 57650 - }, - { - "epoch": 0.5097331989603776, - "grad_norm": 4.656040668487549, - "learning_rate": 4.150444668399371e-05, - "loss": 0.7457, - "step": 57660 - }, - { - "epoch": 0.50982160222069, - "grad_norm": 19.833181381225586, - "learning_rate": 4.150297329632184e-05, - "loss": 0.6536, - "step": 57670 - }, - { - "epoch": 0.5099100054810022, - "grad_norm": 3.820453405380249, - "learning_rate": 4.1501499908649966e-05, - "loss": 0.7856, - "step": 57680 - }, - { - "epoch": 0.5099984087413144, - "grad_norm": 5.237705230712891, - "learning_rate": 4.1500026520978094e-05, - "loss": 0.6445, - "step": 57690 - }, - { - "epoch": 0.5100868120016266, - "grad_norm": 3.4213385581970215, - "learning_rate": 4.149855313330622e-05, - "loss": 0.7726, - "step": 57700 - }, - { - "epoch": 0.5101752152619389, - "grad_norm": 6.883492946624756, - "learning_rate": 4.149707974563436e-05, - "loss": 0.6229, - "step": 57710 - }, - { - "epoch": 0.5102636185222511, - "grad_norm": 2.100466251373291, - "learning_rate": 4.1495606357962486e-05, - "loss": 0.6373, - "step": 57720 - }, - { - "epoch": 0.5103520217825633, - "grad_norm": 1.392956018447876, - "learning_rate": 4.1494132970290614e-05, - "loss": 0.548, - "step": 57730 - }, - { - "epoch": 0.5104404250428756, - "grad_norm": 4.357324600219727, - "learning_rate": 4.149265958261874e-05, - "loss": 0.7606, - "step": 57740 - }, - { - "epoch": 0.5105288283031878, - "grad_norm": 11.297916412353516, - "learning_rate": 4.149118619494687e-05, - "loss": 0.7159, - "step": 57750 - }, - { - "epoch": 0.5106172315635, - "grad_norm": 2.839669704437256, - "learning_rate": 4.1489712807275e-05, - "loss": 0.742, - "step": 57760 - }, - { - "epoch": 0.5107056348238123, - "grad_norm": 1.8932141065597534, - "learning_rate": 4.1488239419603134e-05, - "loss": 0.7786, - "step": 57770 - }, - { - "epoch": 0.5107940380841245, - "grad_norm": 4.7063374519348145, - "learning_rate": 4.148676603193126e-05, - "loss": 0.8112, - "step": 57780 - }, - { - "epoch": 0.5108824413444368, - "grad_norm": 1.873314619064331, - "learning_rate": 4.148529264425939e-05, - "loss": 0.6394, - "step": 57790 - }, - { - "epoch": 0.5109708446047491, - "grad_norm": 5.169839382171631, - "learning_rate": 4.148381925658752e-05, - "loss": 0.7142, - "step": 57800 - }, - { - "epoch": 0.5110592478650613, - "grad_norm": 4.607201099395752, - "learning_rate": 4.148234586891565e-05, - "loss": 0.683, - "step": 57810 - }, - { - "epoch": 0.5111476511253735, - "grad_norm": 9.959545135498047, - "learning_rate": 4.1480872481243776e-05, - "loss": 0.7761, - "step": 57820 - }, - { - "epoch": 0.5112360543856858, - "grad_norm": 1.661966323852539, - "learning_rate": 4.147939909357191e-05, - "loss": 0.7066, - "step": 57830 - }, - { - "epoch": 0.511324457645998, - "grad_norm": 4.903796195983887, - "learning_rate": 4.147792570590003e-05, - "loss": 0.6152, - "step": 57840 - }, - { - "epoch": 0.5114128609063102, - "grad_norm": 14.816862106323242, - "learning_rate": 4.147645231822817e-05, - "loss": 0.7046, - "step": 57850 - }, - { - "epoch": 0.5115012641666224, - "grad_norm": 2.8706767559051514, - "learning_rate": 4.1474978930556296e-05, - "loss": 0.7634, - "step": 57860 - }, - { - "epoch": 0.5115896674269347, - "grad_norm": 2.605053424835205, - "learning_rate": 4.1473505542884425e-05, - "loss": 0.6965, - "step": 57870 - }, - { - "epoch": 0.5116780706872469, - "grad_norm": 2.3393845558166504, - "learning_rate": 4.147203215521255e-05, - "loss": 0.6555, - "step": 57880 - }, - { - "epoch": 0.5117664739475591, - "grad_norm": 4.712070941925049, - "learning_rate": 4.147055876754069e-05, - "loss": 0.6605, - "step": 57890 - }, - { - "epoch": 0.5118548772078714, - "grad_norm": 3.3680405616760254, - "learning_rate": 4.146908537986881e-05, - "loss": 0.8241, - "step": 57900 - }, - { - "epoch": 0.5119432804681837, - "grad_norm": 8.827312469482422, - "learning_rate": 4.1467611992196945e-05, - "loss": 0.6417, - "step": 57910 - }, - { - "epoch": 0.512031683728496, - "grad_norm": 11.913137435913086, - "learning_rate": 4.1466138604525066e-05, - "loss": 0.6392, - "step": 57920 - }, - { - "epoch": 0.5121200869888082, - "grad_norm": 2.735583543777466, - "learning_rate": 4.14646652168532e-05, - "loss": 0.74, - "step": 57930 - }, - { - "epoch": 0.5122084902491204, - "grad_norm": 6.679357051849365, - "learning_rate": 4.146319182918133e-05, - "loss": 0.6854, - "step": 57940 - }, - { - "epoch": 0.5122968935094326, - "grad_norm": 5.886544227600098, - "learning_rate": 4.146171844150946e-05, - "loss": 0.7648, - "step": 57950 - }, - { - "epoch": 0.5123852967697449, - "grad_norm": 7.434231758117676, - "learning_rate": 4.1460245053837587e-05, - "loss": 0.6851, - "step": 57960 - }, - { - "epoch": 0.5124737000300571, - "grad_norm": 5.318103790283203, - "learning_rate": 4.145877166616572e-05, - "loss": 0.6382, - "step": 57970 - }, - { - "epoch": 0.5125621032903693, - "grad_norm": 3.052842140197754, - "learning_rate": 4.145729827849384e-05, - "loss": 0.8278, - "step": 57980 - }, - { - "epoch": 0.5126505065506816, - "grad_norm": 4.2541632652282715, - "learning_rate": 4.145582489082198e-05, - "loss": 0.6927, - "step": 57990 - }, - { - "epoch": 0.5127389098109938, - "grad_norm": 3.4220988750457764, - "learning_rate": 4.145435150315011e-05, - "loss": 0.6728, - "step": 58000 - }, - { - "epoch": 0.512827313071306, - "grad_norm": 1.8903169631958008, - "learning_rate": 4.1452878115478235e-05, - "loss": 0.7378, - "step": 58010 - }, - { - "epoch": 0.5129157163316184, - "grad_norm": 1.5866371393203735, - "learning_rate": 4.1451404727806363e-05, - "loss": 0.8031, - "step": 58020 - }, - { - "epoch": 0.5130041195919306, - "grad_norm": 6.577564239501953, - "learning_rate": 4.144993134013449e-05, - "loss": 0.7508, - "step": 58030 - }, - { - "epoch": 0.5130925228522428, - "grad_norm": 4.205037593841553, - "learning_rate": 4.144845795246262e-05, - "loss": 0.7304, - "step": 58040 - }, - { - "epoch": 0.5131809261125551, - "grad_norm": 6.128932476043701, - "learning_rate": 4.1446984564790755e-05, - "loss": 0.6685, - "step": 58050 - }, - { - "epoch": 0.5132693293728673, - "grad_norm": 7.909671306610107, - "learning_rate": 4.144551117711888e-05, - "loss": 0.6849, - "step": 58060 - }, - { - "epoch": 0.5133577326331795, - "grad_norm": 2.2326576709747314, - "learning_rate": 4.144403778944701e-05, - "loss": 0.6745, - "step": 58070 - }, - { - "epoch": 0.5134461358934918, - "grad_norm": 2.6070303916931152, - "learning_rate": 4.144256440177514e-05, - "loss": 0.5746, - "step": 58080 - }, - { - "epoch": 0.513534539153804, - "grad_norm": 4.542726993560791, - "learning_rate": 4.144109101410327e-05, - "loss": 0.683, - "step": 58090 - }, - { - "epoch": 0.5136229424141162, - "grad_norm": 1.4204988479614258, - "learning_rate": 4.14396176264314e-05, - "loss": 0.6697, - "step": 58100 - }, - { - "epoch": 0.5137113456744284, - "grad_norm": 2.0310754776000977, - "learning_rate": 4.143814423875953e-05, - "loss": 0.734, - "step": 58110 - }, - { - "epoch": 0.5137997489347407, - "grad_norm": 3.9325571060180664, - "learning_rate": 4.1436670851087654e-05, - "loss": 0.7326, - "step": 58120 - }, - { - "epoch": 0.5138881521950529, - "grad_norm": 4.3330206871032715, - "learning_rate": 4.143519746341579e-05, - "loss": 0.8025, - "step": 58130 - }, - { - "epoch": 0.5139765554553652, - "grad_norm": 4.2222514152526855, - "learning_rate": 4.143372407574391e-05, - "loss": 0.7605, - "step": 58140 - }, - { - "epoch": 0.5140649587156775, - "grad_norm": 0.9864398837089539, - "learning_rate": 4.1432250688072046e-05, - "loss": 0.6808, - "step": 58150 - }, - { - "epoch": 0.5141533619759897, - "grad_norm": 3.626385450363159, - "learning_rate": 4.1430777300400174e-05, - "loss": 0.6332, - "step": 58160 - }, - { - "epoch": 0.5142417652363019, - "grad_norm": 1.2500076293945312, - "learning_rate": 4.14293039127283e-05, - "loss": 0.6963, - "step": 58170 - }, - { - "epoch": 0.5143301684966142, - "grad_norm": 9.736292839050293, - "learning_rate": 4.142783052505643e-05, - "loss": 0.6112, - "step": 58180 - }, - { - "epoch": 0.5144185717569264, - "grad_norm": 3.2459876537323, - "learning_rate": 4.1426357137384566e-05, - "loss": 0.7272, - "step": 58190 - }, - { - "epoch": 0.5145069750172386, - "grad_norm": 1.8464261293411255, - "learning_rate": 4.142488374971269e-05, - "loss": 0.6929, - "step": 58200 - }, - { - "epoch": 0.5145953782775509, - "grad_norm": 2.659166097640991, - "learning_rate": 4.142341036204082e-05, - "loss": 0.6395, - "step": 58210 - }, - { - "epoch": 0.5146837815378631, - "grad_norm": 3.726670503616333, - "learning_rate": 4.142193697436895e-05, - "loss": 0.7603, - "step": 58220 - }, - { - "epoch": 0.5147721847981753, - "grad_norm": 7.228145122528076, - "learning_rate": 4.142046358669708e-05, - "loss": 0.7084, - "step": 58230 - }, - { - "epoch": 0.5148605880584876, - "grad_norm": 2.7289206981658936, - "learning_rate": 4.141899019902521e-05, - "loss": 0.8021, - "step": 58240 - }, - { - "epoch": 0.5149489913187998, - "grad_norm": 4.535951614379883, - "learning_rate": 4.141751681135334e-05, - "loss": 0.7113, - "step": 58250 - }, - { - "epoch": 0.5150373945791121, - "grad_norm": 7.421276569366455, - "learning_rate": 4.1416043423681464e-05, - "loss": 0.7187, - "step": 58260 - }, - { - "epoch": 0.5151257978394244, - "grad_norm": 11.996021270751953, - "learning_rate": 4.14145700360096e-05, - "loss": 0.7444, - "step": 58270 - }, - { - "epoch": 0.5152142010997366, - "grad_norm": 4.288655757904053, - "learning_rate": 4.141309664833772e-05, - "loss": 0.5822, - "step": 58280 - }, - { - "epoch": 0.5153026043600488, - "grad_norm": 6.721730709075928, - "learning_rate": 4.1411623260665856e-05, - "loss": 0.7321, - "step": 58290 - }, - { - "epoch": 0.515391007620361, - "grad_norm": 8.89136028289795, - "learning_rate": 4.1410149872993984e-05, - "loss": 0.6965, - "step": 58300 - }, - { - "epoch": 0.5154794108806733, - "grad_norm": 9.430233001708984, - "learning_rate": 4.140867648532211e-05, - "loss": 0.6737, - "step": 58310 - }, - { - "epoch": 0.5155678141409855, - "grad_norm": 4.176496505737305, - "learning_rate": 4.140720309765024e-05, - "loss": 0.6452, - "step": 58320 - }, - { - "epoch": 0.5156562174012977, - "grad_norm": 4.162666320800781, - "learning_rate": 4.1405729709978376e-05, - "loss": 0.6797, - "step": 58330 - }, - { - "epoch": 0.51574462066161, - "grad_norm": 7.322152137756348, - "learning_rate": 4.14042563223065e-05, - "loss": 0.7263, - "step": 58340 - }, - { - "epoch": 0.5158330239219222, - "grad_norm": 3.2498905658721924, - "learning_rate": 4.140278293463463e-05, - "loss": 0.7062, - "step": 58350 - }, - { - "epoch": 0.5159214271822344, - "grad_norm": 1.6769343614578247, - "learning_rate": 4.140130954696276e-05, - "loss": 0.556, - "step": 58360 - }, - { - "epoch": 0.5160098304425467, - "grad_norm": 4.269484996795654, - "learning_rate": 4.139983615929089e-05, - "loss": 0.653, - "step": 58370 - }, - { - "epoch": 0.516098233702859, - "grad_norm": 4.248597145080566, - "learning_rate": 4.139836277161902e-05, - "loss": 0.6451, - "step": 58380 - }, - { - "epoch": 0.5161866369631712, - "grad_norm": 3.693969249725342, - "learning_rate": 4.1396889383947146e-05, - "loss": 0.7437, - "step": 58390 - }, - { - "epoch": 0.5162750402234835, - "grad_norm": 2.039949893951416, - "learning_rate": 4.1395415996275275e-05, - "loss": 0.5287, - "step": 58400 - }, - { - "epoch": 0.5163634434837957, - "grad_norm": 5.918385028839111, - "learning_rate": 4.139394260860341e-05, - "loss": 0.8401, - "step": 58410 - }, - { - "epoch": 0.5164518467441079, - "grad_norm": 0.8016469478607178, - "learning_rate": 4.139246922093153e-05, - "loss": 0.7515, - "step": 58420 - }, - { - "epoch": 0.5165402500044202, - "grad_norm": 3.3638181686401367, - "learning_rate": 4.139099583325967e-05, - "loss": 0.6435, - "step": 58430 - }, - { - "epoch": 0.5166286532647324, - "grad_norm": 2.5616002082824707, - "learning_rate": 4.1389522445587795e-05, - "loss": 0.8031, - "step": 58440 - }, - { - "epoch": 0.5167170565250446, - "grad_norm": 6.8796491622924805, - "learning_rate": 4.138804905791592e-05, - "loss": 0.672, - "step": 58450 - }, - { - "epoch": 0.5168054597853569, - "grad_norm": 4.082709789276123, - "learning_rate": 4.138657567024405e-05, - "loss": 0.7497, - "step": 58460 - }, - { - "epoch": 0.5168938630456691, - "grad_norm": 7.032834529876709, - "learning_rate": 4.138510228257219e-05, - "loss": 0.6995, - "step": 58470 - }, - { - "epoch": 0.5169822663059813, - "grad_norm": 3.5431320667266846, - "learning_rate": 4.138362889490031e-05, - "loss": 0.8405, - "step": 58480 - }, - { - "epoch": 0.5170706695662936, - "grad_norm": 3.9321630001068115, - "learning_rate": 4.1382155507228444e-05, - "loss": 0.7096, - "step": 58490 - }, - { - "epoch": 0.5171590728266059, - "grad_norm": 3.8108572959899902, - "learning_rate": 4.138068211955657e-05, - "loss": 0.7797, - "step": 58500 - }, - { - "epoch": 0.5172474760869181, - "grad_norm": 3.7932891845703125, - "learning_rate": 4.13792087318847e-05, - "loss": 0.7674, - "step": 58510 - }, - { - "epoch": 0.5173358793472304, - "grad_norm": 5.418814659118652, - "learning_rate": 4.137773534421283e-05, - "loss": 0.6954, - "step": 58520 - }, - { - "epoch": 0.5174242826075426, - "grad_norm": 2.8099708557128906, - "learning_rate": 4.137626195654096e-05, - "loss": 0.6776, - "step": 58530 - }, - { - "epoch": 0.5175126858678548, - "grad_norm": 2.9042539596557617, - "learning_rate": 4.1374788568869085e-05, - "loss": 0.6521, - "step": 58540 - }, - { - "epoch": 0.517601089128167, - "grad_norm": 8.134185791015625, - "learning_rate": 4.137331518119722e-05, - "loss": 0.7712, - "step": 58550 - }, - { - "epoch": 0.5176894923884793, - "grad_norm": 4.098917484283447, - "learning_rate": 4.137184179352535e-05, - "loss": 0.6245, - "step": 58560 - }, - { - "epoch": 0.5177778956487915, - "grad_norm": 6.311615943908691, - "learning_rate": 4.137036840585348e-05, - "loss": 0.888, - "step": 58570 - }, - { - "epoch": 0.5178662989091037, - "grad_norm": 0.7927438020706177, - "learning_rate": 4.1368895018181606e-05, - "loss": 0.662, - "step": 58580 - }, - { - "epoch": 0.517954702169416, - "grad_norm": 5.305534839630127, - "learning_rate": 4.1367421630509734e-05, - "loss": 0.6315, - "step": 58590 - }, - { - "epoch": 0.5180431054297282, - "grad_norm": 2.211588144302368, - "learning_rate": 4.136594824283786e-05, - "loss": 0.6306, - "step": 58600 - }, - { - "epoch": 0.5181315086900405, - "grad_norm": 3.837446928024292, - "learning_rate": 4.136447485516599e-05, - "loss": 0.6584, - "step": 58610 - }, - { - "epoch": 0.5182199119503528, - "grad_norm": 3.016932487487793, - "learning_rate": 4.1363001467494126e-05, - "loss": 0.7476, - "step": 58620 - }, - { - "epoch": 0.518308315210665, - "grad_norm": 2.5306055545806885, - "learning_rate": 4.1361528079822254e-05, - "loss": 0.774, - "step": 58630 - }, - { - "epoch": 0.5183967184709772, - "grad_norm": 12.529138565063477, - "learning_rate": 4.136005469215038e-05, - "loss": 0.6646, - "step": 58640 - }, - { - "epoch": 0.5184851217312895, - "grad_norm": 2.591942071914673, - "learning_rate": 4.135858130447851e-05, - "loss": 0.7123, - "step": 58650 - }, - { - "epoch": 0.5185735249916017, - "grad_norm": 9.732890129089355, - "learning_rate": 4.135710791680664e-05, - "loss": 0.7201, - "step": 58660 - }, - { - "epoch": 0.5186619282519139, - "grad_norm": 43.34449768066406, - "learning_rate": 4.135563452913477e-05, - "loss": 0.6637, - "step": 58670 - }, - { - "epoch": 0.5187503315122262, - "grad_norm": 2.275779962539673, - "learning_rate": 4.13541611414629e-05, - "loss": 0.6233, - "step": 58680 - }, - { - "epoch": 0.5188387347725384, - "grad_norm": 6.794007301330566, - "learning_rate": 4.135268775379103e-05, - "loss": 0.7058, - "step": 58690 - }, - { - "epoch": 0.5189271380328506, - "grad_norm": 6.4937052726745605, - "learning_rate": 4.135121436611916e-05, - "loss": 0.7063, - "step": 58700 - }, - { - "epoch": 0.5190155412931629, - "grad_norm": 4.3119072914123535, - "learning_rate": 4.134974097844729e-05, - "loss": 0.7617, - "step": 58710 - }, - { - "epoch": 0.5191039445534751, - "grad_norm": 2.4573779106140137, - "learning_rate": 4.1348267590775416e-05, - "loss": 0.7211, - "step": 58720 - }, - { - "epoch": 0.5191923478137874, - "grad_norm": 1.5036373138427734, - "learning_rate": 4.1346794203103544e-05, - "loss": 0.6407, - "step": 58730 - }, - { - "epoch": 0.5192807510740997, - "grad_norm": 5.157151222229004, - "learning_rate": 4.134532081543168e-05, - "loss": 0.7112, - "step": 58740 - }, - { - "epoch": 0.5193691543344119, - "grad_norm": 4.024260997772217, - "learning_rate": 4.13438474277598e-05, - "loss": 0.5759, - "step": 58750 - }, - { - "epoch": 0.5194575575947241, - "grad_norm": 6.83284330368042, - "learning_rate": 4.1342374040087936e-05, - "loss": 0.598, - "step": 58760 - }, - { - "epoch": 0.5195459608550363, - "grad_norm": 2.6124684810638428, - "learning_rate": 4.1340900652416065e-05, - "loss": 0.7998, - "step": 58770 - }, - { - "epoch": 0.5196343641153486, - "grad_norm": 11.75877571105957, - "learning_rate": 4.133942726474419e-05, - "loss": 0.7857, - "step": 58780 - }, - { - "epoch": 0.5197227673756608, - "grad_norm": 5.469943046569824, - "learning_rate": 4.133795387707232e-05, - "loss": 0.6067, - "step": 58790 - }, - { - "epoch": 0.519811170635973, - "grad_norm": 8.962861061096191, - "learning_rate": 4.1336480489400456e-05, - "loss": 0.5748, - "step": 58800 - }, - { - "epoch": 0.5198995738962853, - "grad_norm": 5.187961578369141, - "learning_rate": 4.133500710172858e-05, - "loss": 0.7629, - "step": 58810 - }, - { - "epoch": 0.5199879771565975, - "grad_norm": 10.618610382080078, - "learning_rate": 4.133353371405671e-05, - "loss": 0.8602, - "step": 58820 - }, - { - "epoch": 0.5200763804169097, - "grad_norm": 4.766876220703125, - "learning_rate": 4.133206032638484e-05, - "loss": 0.6604, - "step": 58830 - }, - { - "epoch": 0.520164783677222, - "grad_norm": 2.7952160835266113, - "learning_rate": 4.133058693871297e-05, - "loss": 0.6609, - "step": 58840 - }, - { - "epoch": 0.5202531869375343, - "grad_norm": 1.3467514514923096, - "learning_rate": 4.13291135510411e-05, - "loss": 0.6487, - "step": 58850 - }, - { - "epoch": 0.5203415901978465, - "grad_norm": 12.943243980407715, - "learning_rate": 4.1327640163369227e-05, - "loss": 0.5737, - "step": 58860 - }, - { - "epoch": 0.5204299934581588, - "grad_norm": 10.962312698364258, - "learning_rate": 4.1326166775697355e-05, - "loss": 0.7685, - "step": 58870 - }, - { - "epoch": 0.520518396718471, - "grad_norm": 2.4418134689331055, - "learning_rate": 4.132469338802549e-05, - "loss": 0.698, - "step": 58880 - }, - { - "epoch": 0.5206067999787832, - "grad_norm": 4.1538238525390625, - "learning_rate": 4.132322000035361e-05, - "loss": 0.7381, - "step": 58890 - }, - { - "epoch": 0.5206952032390955, - "grad_norm": 5.691303253173828, - "learning_rate": 4.132174661268175e-05, - "loss": 0.6875, - "step": 58900 - }, - { - "epoch": 0.5207836064994077, - "grad_norm": 3.1926708221435547, - "learning_rate": 4.1320273225009875e-05, - "loss": 0.6794, - "step": 58910 - }, - { - "epoch": 0.5208720097597199, - "grad_norm": 8.986908912658691, - "learning_rate": 4.1318799837338003e-05, - "loss": 0.6556, - "step": 58920 - }, - { - "epoch": 0.5209604130200322, - "grad_norm": 7.45997953414917, - "learning_rate": 4.131732644966613e-05, - "loss": 0.6706, - "step": 58930 - }, - { - "epoch": 0.5210488162803444, - "grad_norm": 3.960982322692871, - "learning_rate": 4.131585306199427e-05, - "loss": 0.7887, - "step": 58940 - }, - { - "epoch": 0.5211372195406566, - "grad_norm": 2.496504068374634, - "learning_rate": 4.131437967432239e-05, - "loss": 0.7178, - "step": 58950 - }, - { - "epoch": 0.5212256228009688, - "grad_norm": 1.9526597261428833, - "learning_rate": 4.1312906286650524e-05, - "loss": 0.5851, - "step": 58960 - }, - { - "epoch": 0.5213140260612812, - "grad_norm": 9.463976860046387, - "learning_rate": 4.1311432898978645e-05, - "loss": 0.7451, - "step": 58970 - }, - { - "epoch": 0.5214024293215934, - "grad_norm": 7.764998912811279, - "learning_rate": 4.130995951130678e-05, - "loss": 0.8033, - "step": 58980 - }, - { - "epoch": 0.5214908325819057, - "grad_norm": 1.8823398351669312, - "learning_rate": 4.130848612363491e-05, - "loss": 0.5242, - "step": 58990 - }, - { - "epoch": 0.5215792358422179, - "grad_norm": 2.699099063873291, - "learning_rate": 4.130701273596304e-05, - "loss": 0.7504, - "step": 59000 - }, - { - "epoch": 0.5216676391025301, - "grad_norm": 1.5988144874572754, - "learning_rate": 4.1305539348291165e-05, - "loss": 0.6925, - "step": 59010 - }, - { - "epoch": 0.5217560423628423, - "grad_norm": 2.170100688934326, - "learning_rate": 4.13040659606193e-05, - "loss": 0.7847, - "step": 59020 - }, - { - "epoch": 0.5218444456231546, - "grad_norm": 4.162135601043701, - "learning_rate": 4.130259257294742e-05, - "loss": 0.6309, - "step": 59030 - }, - { - "epoch": 0.5219328488834668, - "grad_norm": 7.490842819213867, - "learning_rate": 4.130111918527556e-05, - "loss": 0.6931, - "step": 59040 - }, - { - "epoch": 0.522021252143779, - "grad_norm": 1.2390353679656982, - "learning_rate": 4.1299645797603686e-05, - "loss": 0.7108, - "step": 59050 - }, - { - "epoch": 0.5221096554040913, - "grad_norm": 1.089184045791626, - "learning_rate": 4.1298172409931814e-05, - "loss": 0.6427, - "step": 59060 - }, - { - "epoch": 0.5221980586644035, - "grad_norm": 4.4747819900512695, - "learning_rate": 4.129669902225994e-05, - "loss": 0.8834, - "step": 59070 - }, - { - "epoch": 0.5222864619247157, - "grad_norm": 1.8252582550048828, - "learning_rate": 4.129522563458808e-05, - "loss": 0.7283, - "step": 59080 - }, - { - "epoch": 0.5223748651850281, - "grad_norm": 3.516965389251709, - "learning_rate": 4.12937522469162e-05, - "loss": 0.7267, - "step": 59090 - }, - { - "epoch": 0.5224632684453403, - "grad_norm": 8.140121459960938, - "learning_rate": 4.1292278859244334e-05, - "loss": 0.7086, - "step": 59100 - }, - { - "epoch": 0.5225516717056525, - "grad_norm": 1.565888524055481, - "learning_rate": 4.1290805471572456e-05, - "loss": 0.6313, - "step": 59110 - }, - { - "epoch": 0.5226400749659648, - "grad_norm": 7.233582973480225, - "learning_rate": 4.128933208390059e-05, - "loss": 0.7085, - "step": 59120 - }, - { - "epoch": 0.522728478226277, - "grad_norm": 2.30755877494812, - "learning_rate": 4.128785869622872e-05, - "loss": 0.6304, - "step": 59130 - }, - { - "epoch": 0.5228168814865892, - "grad_norm": 1.746673345565796, - "learning_rate": 4.128638530855685e-05, - "loss": 0.711, - "step": 59140 - }, - { - "epoch": 0.5229052847469015, - "grad_norm": 6.198861598968506, - "learning_rate": 4.1284911920884976e-05, - "loss": 0.7649, - "step": 59150 - }, - { - "epoch": 0.5229936880072137, - "grad_norm": 5.0859150886535645, - "learning_rate": 4.128343853321311e-05, - "loss": 0.8103, - "step": 59160 - }, - { - "epoch": 0.5230820912675259, - "grad_norm": 7.284816265106201, - "learning_rate": 4.128196514554123e-05, - "loss": 0.6774, - "step": 59170 - }, - { - "epoch": 0.5231704945278381, - "grad_norm": 10.868987083435059, - "learning_rate": 4.128049175786937e-05, - "loss": 0.6578, - "step": 59180 - }, - { - "epoch": 0.5232588977881504, - "grad_norm": 4.1673431396484375, - "learning_rate": 4.1279018370197496e-05, - "loss": 0.7689, - "step": 59190 - }, - { - "epoch": 0.5233473010484627, - "grad_norm": 7.5632147789001465, - "learning_rate": 4.1277544982525624e-05, - "loss": 0.6873, - "step": 59200 - }, - { - "epoch": 0.523435704308775, - "grad_norm": 5.527274131774902, - "learning_rate": 4.127607159485375e-05, - "loss": 0.629, - "step": 59210 - }, - { - "epoch": 0.5235241075690872, - "grad_norm": 2.827582359313965, - "learning_rate": 4.127459820718188e-05, - "loss": 0.7302, - "step": 59220 - }, - { - "epoch": 0.5236125108293994, - "grad_norm": 0.5756442546844482, - "learning_rate": 4.127312481951001e-05, - "loss": 0.6671, - "step": 59230 - }, - { - "epoch": 0.5237009140897116, - "grad_norm": 11.035965919494629, - "learning_rate": 4.1271651431838145e-05, - "loss": 0.611, - "step": 59240 - }, - { - "epoch": 0.5237893173500239, - "grad_norm": 9.307296752929688, - "learning_rate": 4.1270178044166266e-05, - "loss": 0.6444, - "step": 59250 - }, - { - "epoch": 0.5238777206103361, - "grad_norm": 2.356525182723999, - "learning_rate": 4.12687046564944e-05, - "loss": 0.6942, - "step": 59260 - }, - { - "epoch": 0.5239661238706483, - "grad_norm": 3.160883665084839, - "learning_rate": 4.126723126882253e-05, - "loss": 0.6771, - "step": 59270 - }, - { - "epoch": 0.5240545271309606, - "grad_norm": 6.4535112380981445, - "learning_rate": 4.126575788115066e-05, - "loss": 0.7218, - "step": 59280 - }, - { - "epoch": 0.5241429303912728, - "grad_norm": 9.797268867492676, - "learning_rate": 4.1264284493478786e-05, - "loss": 0.8082, - "step": 59290 - }, - { - "epoch": 0.524231333651585, - "grad_norm": 3.2987751960754395, - "learning_rate": 4.126281110580692e-05, - "loss": 0.6652, - "step": 59300 - }, - { - "epoch": 0.5243197369118973, - "grad_norm": 1.5817397832870483, - "learning_rate": 4.126133771813504e-05, - "loss": 0.7169, - "step": 59310 - }, - { - "epoch": 0.5244081401722096, - "grad_norm": 4.6767754554748535, - "learning_rate": 4.125986433046318e-05, - "loss": 0.8169, - "step": 59320 - }, - { - "epoch": 0.5244965434325218, - "grad_norm": 8.094579696655273, - "learning_rate": 4.12583909427913e-05, - "loss": 0.6755, - "step": 59330 - }, - { - "epoch": 0.5245849466928341, - "grad_norm": 4.5218682289123535, - "learning_rate": 4.1256917555119435e-05, - "loss": 0.6427, - "step": 59340 - }, - { - "epoch": 0.5246733499531463, - "grad_norm": 3.401343822479248, - "learning_rate": 4.125544416744756e-05, - "loss": 0.6054, - "step": 59350 - }, - { - "epoch": 0.5247617532134585, - "grad_norm": 3.321723222732544, - "learning_rate": 4.125397077977569e-05, - "loss": 0.8062, - "step": 59360 - }, - { - "epoch": 0.5248501564737708, - "grad_norm": 10.014087677001953, - "learning_rate": 4.125249739210382e-05, - "loss": 0.6035, - "step": 59370 - }, - { - "epoch": 0.524938559734083, - "grad_norm": 1.1051753759384155, - "learning_rate": 4.1251024004431955e-05, - "loss": 0.6847, - "step": 59380 - }, - { - "epoch": 0.5250269629943952, - "grad_norm": 6.683419704437256, - "learning_rate": 4.124955061676008e-05, - "loss": 0.6571, - "step": 59390 - }, - { - "epoch": 0.5251153662547074, - "grad_norm": 4.3637261390686035, - "learning_rate": 4.124807722908821e-05, - "loss": 0.872, - "step": 59400 - }, - { - "epoch": 0.5252037695150197, - "grad_norm": 5.4338579177856445, - "learning_rate": 4.124660384141634e-05, - "loss": 0.693, - "step": 59410 - }, - { - "epoch": 0.5252921727753319, - "grad_norm": 4.866839408874512, - "learning_rate": 4.124513045374447e-05, - "loss": 0.7539, - "step": 59420 - }, - { - "epoch": 0.5253805760356441, - "grad_norm": 2.122809410095215, - "learning_rate": 4.12436570660726e-05, - "loss": 0.752, - "step": 59430 - }, - { - "epoch": 0.5254689792959565, - "grad_norm": 6.219788551330566, - "learning_rate": 4.1242183678400725e-05, - "loss": 0.6567, - "step": 59440 - }, - { - "epoch": 0.5255573825562687, - "grad_norm": 3.0866973400115967, - "learning_rate": 4.1240710290728854e-05, - "loss": 0.6744, - "step": 59450 - }, - { - "epoch": 0.525645785816581, - "grad_norm": 1.6338080167770386, - "learning_rate": 4.123923690305699e-05, - "loss": 0.6373, - "step": 59460 - }, - { - "epoch": 0.5257341890768932, - "grad_norm": 13.409435272216797, - "learning_rate": 4.123776351538512e-05, - "loss": 0.6503, - "step": 59470 - }, - { - "epoch": 0.5258225923372054, - "grad_norm": 10.768632888793945, - "learning_rate": 4.1236290127713245e-05, - "loss": 0.7392, - "step": 59480 - }, - { - "epoch": 0.5259109955975176, - "grad_norm": 2.5887343883514404, - "learning_rate": 4.1234816740041374e-05, - "loss": 0.7239, - "step": 59490 - }, - { - "epoch": 0.5259993988578299, - "grad_norm": 7.033324241638184, - "learning_rate": 4.12333433523695e-05, - "loss": 0.7645, - "step": 59500 - }, - { - "epoch": 0.5260878021181421, - "grad_norm": 16.299779891967773, - "learning_rate": 4.123186996469763e-05, - "loss": 0.6919, - "step": 59510 - }, - { - "epoch": 0.5261762053784543, - "grad_norm": 5.943434238433838, - "learning_rate": 4.1230396577025766e-05, - "loss": 0.5952, - "step": 59520 - }, - { - "epoch": 0.5262646086387666, - "grad_norm": 4.512763023376465, - "learning_rate": 4.1228923189353894e-05, - "loss": 0.6243, - "step": 59530 - }, - { - "epoch": 0.5263530118990788, - "grad_norm": 4.6850433349609375, - "learning_rate": 4.122744980168202e-05, - "loss": 0.7236, - "step": 59540 - }, - { - "epoch": 0.526441415159391, - "grad_norm": 1.1690351963043213, - "learning_rate": 4.122597641401015e-05, - "loss": 0.7844, - "step": 59550 - }, - { - "epoch": 0.5265298184197034, - "grad_norm": 3.369765281677246, - "learning_rate": 4.122450302633828e-05, - "loss": 0.6246, - "step": 59560 - }, - { - "epoch": 0.5266182216800156, - "grad_norm": 3.848281145095825, - "learning_rate": 4.122302963866641e-05, - "loss": 0.658, - "step": 59570 - }, - { - "epoch": 0.5267066249403278, - "grad_norm": 4.382859706878662, - "learning_rate": 4.1221556250994536e-05, - "loss": 0.6938, - "step": 59580 - }, - { - "epoch": 0.5267950282006401, - "grad_norm": 5.016404628753662, - "learning_rate": 4.122008286332267e-05, - "loss": 0.75, - "step": 59590 - }, - { - "epoch": 0.5268834314609523, - "grad_norm": 4.435308933258057, - "learning_rate": 4.12186094756508e-05, - "loss": 0.7533, - "step": 59600 - }, - { - "epoch": 0.5269718347212645, - "grad_norm": 1.7211618423461914, - "learning_rate": 4.121713608797893e-05, - "loss": 0.7225, - "step": 59610 - }, - { - "epoch": 0.5270602379815768, - "grad_norm": 3.0837252140045166, - "learning_rate": 4.1215662700307056e-05, - "loss": 0.8015, - "step": 59620 - }, - { - "epoch": 0.527148641241889, - "grad_norm": 1.7684383392333984, - "learning_rate": 4.1214189312635184e-05, - "loss": 0.6607, - "step": 59630 - }, - { - "epoch": 0.5272370445022012, - "grad_norm": 1.327669620513916, - "learning_rate": 4.121271592496331e-05, - "loss": 0.7018, - "step": 59640 - }, - { - "epoch": 0.5273254477625134, - "grad_norm": 2.8634445667266846, - "learning_rate": 4.121124253729145e-05, - "loss": 0.6262, - "step": 59650 - }, - { - "epoch": 0.5274138510228257, - "grad_norm": 5.957954406738281, - "learning_rate": 4.1209769149619576e-05, - "loss": 0.527, - "step": 59660 - }, - { - "epoch": 0.527502254283138, - "grad_norm": 5.212845802307129, - "learning_rate": 4.1208295761947705e-05, - "loss": 0.7562, - "step": 59670 - }, - { - "epoch": 0.5275906575434502, - "grad_norm": 2.8976666927337646, - "learning_rate": 4.120682237427583e-05, - "loss": 0.5941, - "step": 59680 - }, - { - "epoch": 0.5276790608037625, - "grad_norm": 1.9253665208816528, - "learning_rate": 4.120534898660396e-05, - "loss": 0.5756, - "step": 59690 - }, - { - "epoch": 0.5277674640640747, - "grad_norm": 3.331376552581787, - "learning_rate": 4.120387559893209e-05, - "loss": 0.5925, - "step": 59700 - }, - { - "epoch": 0.5278558673243869, - "grad_norm": 7.005678176879883, - "learning_rate": 4.1202402211260225e-05, - "loss": 0.6466, - "step": 59710 - }, - { - "epoch": 0.5279442705846992, - "grad_norm": 5.357455730438232, - "learning_rate": 4.1200928823588346e-05, - "loss": 0.5732, - "step": 59720 - }, - { - "epoch": 0.5280326738450114, - "grad_norm": 1.7362685203552246, - "learning_rate": 4.119945543591648e-05, - "loss": 0.6216, - "step": 59730 - }, - { - "epoch": 0.5281210771053236, - "grad_norm": 2.231977939605713, - "learning_rate": 4.119798204824461e-05, - "loss": 0.6929, - "step": 59740 - }, - { - "epoch": 0.5282094803656359, - "grad_norm": 1.708698034286499, - "learning_rate": 4.119650866057274e-05, - "loss": 0.7208, - "step": 59750 - }, - { - "epoch": 0.5282978836259481, - "grad_norm": 5.46640157699585, - "learning_rate": 4.1195035272900866e-05, - "loss": 0.7098, - "step": 59760 - }, - { - "epoch": 0.5283862868862603, - "grad_norm": 7.511179447174072, - "learning_rate": 4.1193561885229e-05, - "loss": 0.6752, - "step": 59770 - }, - { - "epoch": 0.5284746901465726, - "grad_norm": 4.109575271606445, - "learning_rate": 4.119208849755712e-05, - "loss": 0.6387, - "step": 59780 - }, - { - "epoch": 0.5285630934068849, - "grad_norm": 14.311249732971191, - "learning_rate": 4.119061510988526e-05, - "loss": 0.7875, - "step": 59790 - }, - { - "epoch": 0.5286514966671971, - "grad_norm": 1.998884677886963, - "learning_rate": 4.118914172221338e-05, - "loss": 0.6728, - "step": 59800 - }, - { - "epoch": 0.5287398999275094, - "grad_norm": 3.9691755771636963, - "learning_rate": 4.1187668334541515e-05, - "loss": 0.6487, - "step": 59810 - }, - { - "epoch": 0.5288283031878216, - "grad_norm": 14.432732582092285, - "learning_rate": 4.118619494686964e-05, - "loss": 0.7015, - "step": 59820 - }, - { - "epoch": 0.5289167064481338, - "grad_norm": 8.749857902526855, - "learning_rate": 4.118472155919777e-05, - "loss": 0.756, - "step": 59830 - }, - { - "epoch": 0.529005109708446, - "grad_norm": 3.3115224838256836, - "learning_rate": 4.11832481715259e-05, - "loss": 0.7646, - "step": 59840 - }, - { - "epoch": 0.5290935129687583, - "grad_norm": 5.961042881011963, - "learning_rate": 4.1181774783854035e-05, - "loss": 0.6872, - "step": 59850 - }, - { - "epoch": 0.5291819162290705, - "grad_norm": 3.7259011268615723, - "learning_rate": 4.118030139618216e-05, - "loss": 0.6913, - "step": 59860 - }, - { - "epoch": 0.5292703194893827, - "grad_norm": 1.4316565990447998, - "learning_rate": 4.117882800851029e-05, - "loss": 0.8236, - "step": 59870 - }, - { - "epoch": 0.529358722749695, - "grad_norm": 3.7824482917785645, - "learning_rate": 4.117735462083842e-05, - "loss": 0.612, - "step": 59880 - }, - { - "epoch": 0.5294471260100072, - "grad_norm": 6.120148181915283, - "learning_rate": 4.117588123316655e-05, - "loss": 0.7283, - "step": 59890 - }, - { - "epoch": 0.5295355292703194, - "grad_norm": 3.144371271133423, - "learning_rate": 4.117440784549468e-05, - "loss": 0.6866, - "step": 59900 - }, - { - "epoch": 0.5296239325306318, - "grad_norm": 10.815184593200684, - "learning_rate": 4.1172934457822805e-05, - "loss": 0.6136, - "step": 59910 - }, - { - "epoch": 0.529712335790944, - "grad_norm": 5.573663711547852, - "learning_rate": 4.1171461070150934e-05, - "loss": 0.643, - "step": 59920 - }, - { - "epoch": 0.5298007390512562, - "grad_norm": 1.9701824188232422, - "learning_rate": 4.116998768247907e-05, - "loss": 0.7374, - "step": 59930 - }, - { - "epoch": 0.5298891423115685, - "grad_norm": 2.470747232437134, - "learning_rate": 4.116851429480719e-05, - "loss": 0.627, - "step": 59940 - }, - { - "epoch": 0.5299775455718807, - "grad_norm": 2.7850711345672607, - "learning_rate": 4.1167040907135326e-05, - "loss": 0.6567, - "step": 59950 - }, - { - "epoch": 0.5300659488321929, - "grad_norm": 1.6354985237121582, - "learning_rate": 4.1165567519463454e-05, - "loss": 0.6099, - "step": 59960 - }, - { - "epoch": 0.5301543520925052, - "grad_norm": 10.398540496826172, - "learning_rate": 4.116409413179158e-05, - "loss": 0.677, - "step": 59970 - }, - { - "epoch": 0.5302427553528174, - "grad_norm": 3.451448440551758, - "learning_rate": 4.116262074411971e-05, - "loss": 0.6194, - "step": 59980 - }, - { - "epoch": 0.5303311586131296, - "grad_norm": 19.99856948852539, - "learning_rate": 4.1161147356447846e-05, - "loss": 0.7397, - "step": 59990 - }, - { - "epoch": 0.5304195618734419, - "grad_norm": 1.5104225873947144, - "learning_rate": 4.115967396877597e-05, - "loss": 0.6559, - "step": 60000 - }, - { - "epoch": 0.5305079651337541, - "grad_norm": 4.500480651855469, - "learning_rate": 4.11582005811041e-05, - "loss": 0.7364, - "step": 60010 - }, - { - "epoch": 0.5305963683940663, - "grad_norm": 4.798305034637451, - "learning_rate": 4.115672719343223e-05, - "loss": 0.767, - "step": 60020 - }, - { - "epoch": 0.5306847716543787, - "grad_norm": 8.019207954406738, - "learning_rate": 4.115525380576036e-05, - "loss": 0.7647, - "step": 60030 - }, - { - "epoch": 0.5307731749146909, - "grad_norm": 4.901878356933594, - "learning_rate": 4.115378041808849e-05, - "loss": 0.7183, - "step": 60040 - }, - { - "epoch": 0.5308615781750031, - "grad_norm": 3.3756978511810303, - "learning_rate": 4.1152307030416616e-05, - "loss": 0.5972, - "step": 60050 - }, - { - "epoch": 0.5309499814353154, - "grad_norm": 1.3737549781799316, - "learning_rate": 4.1150833642744744e-05, - "loss": 0.6125, - "step": 60060 - }, - { - "epoch": 0.5310383846956276, - "grad_norm": 6.525880813598633, - "learning_rate": 4.114936025507288e-05, - "loss": 0.7338, - "step": 60070 - }, - { - "epoch": 0.5311267879559398, - "grad_norm": 7.533184051513672, - "learning_rate": 4.1147886867401e-05, - "loss": 0.6426, - "step": 60080 - }, - { - "epoch": 0.531215191216252, - "grad_norm": 2.9823904037475586, - "learning_rate": 4.1146413479729136e-05, - "loss": 0.5598, - "step": 60090 - }, - { - "epoch": 0.5313035944765643, - "grad_norm": 1.5166293382644653, - "learning_rate": 4.1144940092057264e-05, - "loss": 0.6468, - "step": 60100 - }, - { - "epoch": 0.5313919977368765, - "grad_norm": 3.1905624866485596, - "learning_rate": 4.114346670438539e-05, - "loss": 0.602, - "step": 60110 - }, - { - "epoch": 0.5314804009971887, - "grad_norm": 1.7269514799118042, - "learning_rate": 4.114199331671352e-05, - "loss": 0.7132, - "step": 60120 - }, - { - "epoch": 0.531568804257501, - "grad_norm": 11.500814437866211, - "learning_rate": 4.1140519929041656e-05, - "loss": 0.6368, - "step": 60130 - }, - { - "epoch": 0.5316572075178132, - "grad_norm": 2.3837459087371826, - "learning_rate": 4.113904654136978e-05, - "loss": 0.6723, - "step": 60140 - }, - { - "epoch": 0.5317456107781255, - "grad_norm": 5.128081321716309, - "learning_rate": 4.113757315369791e-05, - "loss": 0.7357, - "step": 60150 - }, - { - "epoch": 0.5318340140384378, - "grad_norm": 7.816521167755127, - "learning_rate": 4.1136099766026035e-05, - "loss": 0.7343, - "step": 60160 - }, - { - "epoch": 0.53192241729875, - "grad_norm": 6.74944543838501, - "learning_rate": 4.113462637835417e-05, - "loss": 0.7601, - "step": 60170 - }, - { - "epoch": 0.5320108205590622, - "grad_norm": 6.753887176513672, - "learning_rate": 4.11331529906823e-05, - "loss": 0.6861, - "step": 60180 - }, - { - "epoch": 0.5320992238193745, - "grad_norm": 2.1089651584625244, - "learning_rate": 4.1131679603010426e-05, - "loss": 0.5807, - "step": 60190 - }, - { - "epoch": 0.5321876270796867, - "grad_norm": 5.0412445068359375, - "learning_rate": 4.1130206215338555e-05, - "loss": 0.7874, - "step": 60200 - }, - { - "epoch": 0.5322760303399989, - "grad_norm": 3.698671579360962, - "learning_rate": 4.112873282766669e-05, - "loss": 0.5968, - "step": 60210 - }, - { - "epoch": 0.5323644336003112, - "grad_norm": 6.474447727203369, - "learning_rate": 4.112725943999481e-05, - "loss": 0.6826, - "step": 60220 - }, - { - "epoch": 0.5324528368606234, - "grad_norm": 4.3067755699157715, - "learning_rate": 4.1125786052322947e-05, - "loss": 0.6573, - "step": 60230 - }, - { - "epoch": 0.5325412401209356, - "grad_norm": 2.8778164386749268, - "learning_rate": 4.1124312664651075e-05, - "loss": 0.67, - "step": 60240 - }, - { - "epoch": 0.5326296433812479, - "grad_norm": 4.32625150680542, - "learning_rate": 4.11228392769792e-05, - "loss": 0.7479, - "step": 60250 - }, - { - "epoch": 0.5327180466415602, - "grad_norm": 0.9179672598838806, - "learning_rate": 4.112136588930733e-05, - "loss": 0.5865, - "step": 60260 - }, - { - "epoch": 0.5328064499018724, - "grad_norm": 2.5670909881591797, - "learning_rate": 4.111989250163546e-05, - "loss": 0.613, - "step": 60270 - }, - { - "epoch": 0.5328948531621847, - "grad_norm": 5.7348713874816895, - "learning_rate": 4.111841911396359e-05, - "loss": 0.5972, - "step": 60280 - }, - { - "epoch": 0.5329832564224969, - "grad_norm": 1.8548667430877686, - "learning_rate": 4.1116945726291723e-05, - "loss": 0.7069, - "step": 60290 - }, - { - "epoch": 0.5330716596828091, - "grad_norm": 9.18394947052002, - "learning_rate": 4.1115472338619845e-05, - "loss": 0.758, - "step": 60300 - }, - { - "epoch": 0.5331600629431213, - "grad_norm": 8.072030067443848, - "learning_rate": 4.111399895094798e-05, - "loss": 0.6602, - "step": 60310 - }, - { - "epoch": 0.5332484662034336, - "grad_norm": 2.1545324325561523, - "learning_rate": 4.111252556327611e-05, - "loss": 0.6938, - "step": 60320 - }, - { - "epoch": 0.5333368694637458, - "grad_norm": 14.209163665771484, - "learning_rate": 4.111105217560424e-05, - "loss": 0.594, - "step": 60330 - }, - { - "epoch": 0.533425272724058, - "grad_norm": 3.6010241508483887, - "learning_rate": 4.1109578787932365e-05, - "loss": 0.7207, - "step": 60340 - }, - { - "epoch": 0.5335136759843703, - "grad_norm": 4.17183780670166, - "learning_rate": 4.11081054002605e-05, - "loss": 0.7749, - "step": 60350 - }, - { - "epoch": 0.5336020792446825, - "grad_norm": 4.2827887535095215, - "learning_rate": 4.110663201258862e-05, - "loss": 0.5999, - "step": 60360 - }, - { - "epoch": 0.5336904825049947, - "grad_norm": 2.8354761600494385, - "learning_rate": 4.110515862491676e-05, - "loss": 0.7673, - "step": 60370 - }, - { - "epoch": 0.5337788857653071, - "grad_norm": 1.863929033279419, - "learning_rate": 4.1103685237244885e-05, - "loss": 0.6725, - "step": 60380 - }, - { - "epoch": 0.5338672890256193, - "grad_norm": 2.815585136413574, - "learning_rate": 4.1102211849573014e-05, - "loss": 0.6931, - "step": 60390 - }, - { - "epoch": 0.5339556922859315, - "grad_norm": 8.654183387756348, - "learning_rate": 4.110073846190114e-05, - "loss": 0.6496, - "step": 60400 - }, - { - "epoch": 0.5340440955462438, - "grad_norm": 3.421689748764038, - "learning_rate": 4.109926507422927e-05, - "loss": 0.6436, - "step": 60410 - }, - { - "epoch": 0.534132498806556, - "grad_norm": 5.369647026062012, - "learning_rate": 4.10977916865574e-05, - "loss": 0.686, - "step": 60420 - }, - { - "epoch": 0.5342209020668682, - "grad_norm": 2.883557081222534, - "learning_rate": 4.1096318298885534e-05, - "loss": 0.5704, - "step": 60430 - }, - { - "epoch": 0.5343093053271805, - "grad_norm": 4.321925640106201, - "learning_rate": 4.109484491121366e-05, - "loss": 0.776, - "step": 60440 - }, - { - "epoch": 0.5343977085874927, - "grad_norm": 9.737076759338379, - "learning_rate": 4.109337152354179e-05, - "loss": 0.6094, - "step": 60450 - }, - { - "epoch": 0.5344861118478049, - "grad_norm": 2.928046464920044, - "learning_rate": 4.109189813586992e-05, - "loss": 0.6826, - "step": 60460 - }, - { - "epoch": 0.5345745151081172, - "grad_norm": 9.470121383666992, - "learning_rate": 4.109042474819805e-05, - "loss": 0.856, - "step": 60470 - }, - { - "epoch": 0.5346629183684294, - "grad_norm": 4.74775505065918, - "learning_rate": 4.1088951360526176e-05, - "loss": 0.5865, - "step": 60480 - }, - { - "epoch": 0.5347513216287416, - "grad_norm": 4.381826400756836, - "learning_rate": 4.108747797285431e-05, - "loss": 0.7575, - "step": 60490 - }, - { - "epoch": 0.534839724889054, - "grad_norm": 6.804551601409912, - "learning_rate": 4.108600458518244e-05, - "loss": 0.7795, - "step": 60500 - }, - { - "epoch": 0.5349281281493662, - "grad_norm": 1.8940178155899048, - "learning_rate": 4.108453119751057e-05, - "loss": 0.6678, - "step": 60510 - }, - { - "epoch": 0.5350165314096784, - "grad_norm": 2.1449060440063477, - "learning_rate": 4.1083057809838696e-05, - "loss": 0.729, - "step": 60520 - }, - { - "epoch": 0.5351049346699907, - "grad_norm": 3.6761341094970703, - "learning_rate": 4.1081584422166824e-05, - "loss": 0.7945, - "step": 60530 - }, - { - "epoch": 0.5351933379303029, - "grad_norm": 2.263317108154297, - "learning_rate": 4.108011103449495e-05, - "loss": 0.7009, - "step": 60540 - }, - { - "epoch": 0.5352817411906151, - "grad_norm": 3.6665337085723877, - "learning_rate": 4.107863764682308e-05, - "loss": 0.6259, - "step": 60550 - }, - { - "epoch": 0.5353701444509273, - "grad_norm": 1.876232624053955, - "learning_rate": 4.1077164259151216e-05, - "loss": 0.7658, - "step": 60560 - }, - { - "epoch": 0.5354585477112396, - "grad_norm": 3.949385166168213, - "learning_rate": 4.1075690871479344e-05, - "loss": 0.6565, - "step": 60570 - }, - { - "epoch": 0.5355469509715518, - "grad_norm": 2.0023906230926514, - "learning_rate": 4.107421748380747e-05, - "loss": 0.5897, - "step": 60580 - }, - { - "epoch": 0.535635354231864, - "grad_norm": 2.9671292304992676, - "learning_rate": 4.10727440961356e-05, - "loss": 0.6803, - "step": 60590 - }, - { - "epoch": 0.5357237574921763, - "grad_norm": 6.2092604637146, - "learning_rate": 4.107127070846373e-05, - "loss": 0.7583, - "step": 60600 - }, - { - "epoch": 0.5358121607524885, - "grad_norm": 1.2505688667297363, - "learning_rate": 4.106979732079186e-05, - "loss": 0.6547, - "step": 60610 - }, - { - "epoch": 0.5359005640128008, - "grad_norm": 7.783191204071045, - "learning_rate": 4.106832393311999e-05, - "loss": 0.7015, - "step": 60620 - }, - { - "epoch": 0.5359889672731131, - "grad_norm": 9.489867210388184, - "learning_rate": 4.1066850545448115e-05, - "loss": 0.9866, - "step": 60630 - }, - { - "epoch": 0.5360773705334253, - "grad_norm": 2.7540061473846436, - "learning_rate": 4.106537715777625e-05, - "loss": 0.6282, - "step": 60640 - }, - { - "epoch": 0.5361657737937375, - "grad_norm": 5.599301815032959, - "learning_rate": 4.106390377010438e-05, - "loss": 0.6783, - "step": 60650 - }, - { - "epoch": 0.5362541770540498, - "grad_norm": 2.267117738723755, - "learning_rate": 4.1062430382432506e-05, - "loss": 0.8601, - "step": 60660 - }, - { - "epoch": 0.536342580314362, - "grad_norm": 1.5320429801940918, - "learning_rate": 4.1060956994760635e-05, - "loss": 0.5809, - "step": 60670 - }, - { - "epoch": 0.5364309835746742, - "grad_norm": 2.189704656600952, - "learning_rate": 4.105948360708877e-05, - "loss": 0.6703, - "step": 60680 - }, - { - "epoch": 0.5365193868349865, - "grad_norm": 3.8731002807617188, - "learning_rate": 4.105801021941689e-05, - "loss": 0.6857, - "step": 60690 - }, - { - "epoch": 0.5366077900952987, - "grad_norm": 4.347623825073242, - "learning_rate": 4.105653683174503e-05, - "loss": 0.7235, - "step": 60700 - }, - { - "epoch": 0.5366961933556109, - "grad_norm": 4.423962593078613, - "learning_rate": 4.1055063444073155e-05, - "loss": 0.679, - "step": 60710 - }, - { - "epoch": 0.5367845966159231, - "grad_norm": 7.153933525085449, - "learning_rate": 4.105359005640128e-05, - "loss": 0.5527, - "step": 60720 - }, - { - "epoch": 0.5368729998762355, - "grad_norm": 7.4368672370910645, - "learning_rate": 4.105211666872941e-05, - "loss": 0.775, - "step": 60730 - }, - { - "epoch": 0.5369614031365477, - "grad_norm": 4.453962326049805, - "learning_rate": 4.105064328105754e-05, - "loss": 0.7614, - "step": 60740 - }, - { - "epoch": 0.53704980639686, - "grad_norm": 4.159331798553467, - "learning_rate": 4.104916989338567e-05, - "loss": 0.5699, - "step": 60750 - }, - { - "epoch": 0.5371382096571722, - "grad_norm": 4.9161248207092285, - "learning_rate": 4.1047696505713804e-05, - "loss": 0.7272, - "step": 60760 - }, - { - "epoch": 0.5372266129174844, - "grad_norm": 4.475187301635742, - "learning_rate": 4.1046223118041925e-05, - "loss": 0.7538, - "step": 60770 - }, - { - "epoch": 0.5373150161777966, - "grad_norm": 4.353623390197754, - "learning_rate": 4.104474973037006e-05, - "loss": 0.7292, - "step": 60780 - }, - { - "epoch": 0.5374034194381089, - "grad_norm": 9.009878158569336, - "learning_rate": 4.104327634269819e-05, - "loss": 0.6242, - "step": 60790 - }, - { - "epoch": 0.5374918226984211, - "grad_norm": 1.3772538900375366, - "learning_rate": 4.104180295502632e-05, - "loss": 0.6964, - "step": 60800 - }, - { - "epoch": 0.5375802259587333, - "grad_norm": 2.8624658584594727, - "learning_rate": 4.1040329567354445e-05, - "loss": 0.7204, - "step": 60810 - }, - { - "epoch": 0.5376686292190456, - "grad_norm": 3.146087408065796, - "learning_rate": 4.103885617968258e-05, - "loss": 0.6147, - "step": 60820 - }, - { - "epoch": 0.5377570324793578, - "grad_norm": 2.5250422954559326, - "learning_rate": 4.10373827920107e-05, - "loss": 0.7273, - "step": 60830 - }, - { - "epoch": 0.53784543573967, - "grad_norm": 4.4286699295043945, - "learning_rate": 4.103590940433884e-05, - "loss": 0.7399, - "step": 60840 - }, - { - "epoch": 0.5379338389999824, - "grad_norm": 3.8323612213134766, - "learning_rate": 4.103443601666696e-05, - "loss": 0.6672, - "step": 60850 - }, - { - "epoch": 0.5380222422602946, - "grad_norm": 1.406678557395935, - "learning_rate": 4.1032962628995094e-05, - "loss": 0.6859, - "step": 60860 - }, - { - "epoch": 0.5381106455206068, - "grad_norm": 3.1148641109466553, - "learning_rate": 4.103148924132322e-05, - "loss": 0.7467, - "step": 60870 - }, - { - "epoch": 0.5381990487809191, - "grad_norm": 5.790852069854736, - "learning_rate": 4.103001585365135e-05, - "loss": 0.6747, - "step": 60880 - }, - { - "epoch": 0.5382874520412313, - "grad_norm": 3.980956554412842, - "learning_rate": 4.102854246597948e-05, - "loss": 0.7802, - "step": 60890 - }, - { - "epoch": 0.5383758553015435, - "grad_norm": 5.957650184631348, - "learning_rate": 4.1027069078307614e-05, - "loss": 0.8262, - "step": 60900 - }, - { - "epoch": 0.5384642585618558, - "grad_norm": 2.129995822906494, - "learning_rate": 4.1025595690635736e-05, - "loss": 0.5558, - "step": 60910 - }, - { - "epoch": 0.538552661822168, - "grad_norm": 3.5420641899108887, - "learning_rate": 4.102412230296387e-05, - "loss": 0.8433, - "step": 60920 - }, - { - "epoch": 0.5386410650824802, - "grad_norm": 2.2441043853759766, - "learning_rate": 4.1022648915292e-05, - "loss": 0.8058, - "step": 60930 - }, - { - "epoch": 0.5387294683427925, - "grad_norm": 1.1636404991149902, - "learning_rate": 4.102117552762013e-05, - "loss": 0.6055, - "step": 60940 - }, - { - "epoch": 0.5388178716031047, - "grad_norm": 1.2521240711212158, - "learning_rate": 4.1019702139948256e-05, - "loss": 0.7209, - "step": 60950 - }, - { - "epoch": 0.5389062748634169, - "grad_norm": 5.205761432647705, - "learning_rate": 4.101822875227639e-05, - "loss": 0.8199, - "step": 60960 - }, - { - "epoch": 0.5389946781237293, - "grad_norm": 10.133781433105469, - "learning_rate": 4.101675536460451e-05, - "loss": 0.6464, - "step": 60970 - }, - { - "epoch": 0.5390830813840415, - "grad_norm": 2.7149558067321777, - "learning_rate": 4.101528197693265e-05, - "loss": 0.6848, - "step": 60980 - }, - { - "epoch": 0.5391714846443537, - "grad_norm": 2.772820472717285, - "learning_rate": 4.101380858926077e-05, - "loss": 0.6317, - "step": 60990 - }, - { - "epoch": 0.539259887904666, - "grad_norm": 7.021060943603516, - "learning_rate": 4.1012335201588904e-05, - "loss": 0.6979, - "step": 61000 - }, - { - "epoch": 0.5393482911649782, - "grad_norm": 2.6324551105499268, - "learning_rate": 4.101086181391703e-05, - "loss": 0.6622, - "step": 61010 - }, - { - "epoch": 0.5394366944252904, - "grad_norm": 9.457308769226074, - "learning_rate": 4.100938842624516e-05, - "loss": 0.6594, - "step": 61020 - }, - { - "epoch": 0.5395250976856026, - "grad_norm": 5.52971076965332, - "learning_rate": 4.100791503857329e-05, - "loss": 0.7412, - "step": 61030 - }, - { - "epoch": 0.5396135009459149, - "grad_norm": 5.493051052093506, - "learning_rate": 4.1006441650901425e-05, - "loss": 0.7294, - "step": 61040 - }, - { - "epoch": 0.5397019042062271, - "grad_norm": 1.9145418405532837, - "learning_rate": 4.1004968263229546e-05, - "loss": 0.6207, - "step": 61050 - }, - { - "epoch": 0.5397903074665393, - "grad_norm": 1.7710217237472534, - "learning_rate": 4.100349487555768e-05, - "loss": 0.5974, - "step": 61060 - }, - { - "epoch": 0.5398787107268516, - "grad_norm": 7.692457675933838, - "learning_rate": 4.100202148788581e-05, - "loss": 0.5967, - "step": 61070 - }, - { - "epoch": 0.5399671139871638, - "grad_norm": 3.235154151916504, - "learning_rate": 4.100054810021394e-05, - "loss": 0.6482, - "step": 61080 - }, - { - "epoch": 0.5400555172474761, - "grad_norm": 2.170754909515381, - "learning_rate": 4.0999074712542066e-05, - "loss": 0.6941, - "step": 61090 - }, - { - "epoch": 0.5401439205077884, - "grad_norm": 1.1746199131011963, - "learning_rate": 4.0997601324870195e-05, - "loss": 0.6075, - "step": 61100 - }, - { - "epoch": 0.5402323237681006, - "grad_norm": 2.0137100219726562, - "learning_rate": 4.099612793719832e-05, - "loss": 0.6752, - "step": 61110 - }, - { - "epoch": 0.5403207270284128, - "grad_norm": 1.190150260925293, - "learning_rate": 4.099465454952646e-05, - "loss": 0.7344, - "step": 61120 - }, - { - "epoch": 0.5404091302887251, - "grad_norm": 2.0723729133605957, - "learning_rate": 4.099318116185458e-05, - "loss": 0.7581, - "step": 61130 - }, - { - "epoch": 0.5404975335490373, - "grad_norm": 2.7568132877349854, - "learning_rate": 4.0991707774182715e-05, - "loss": 0.6639, - "step": 61140 - }, - { - "epoch": 0.5405859368093495, - "grad_norm": 2.5267369747161865, - "learning_rate": 4.099023438651084e-05, - "loss": 0.7648, - "step": 61150 - }, - { - "epoch": 0.5406743400696618, - "grad_norm": 9.138727188110352, - "learning_rate": 4.098876099883897e-05, - "loss": 0.6851, - "step": 61160 - }, - { - "epoch": 0.540762743329974, - "grad_norm": 2.625595808029175, - "learning_rate": 4.09872876111671e-05, - "loss": 0.6179, - "step": 61170 - }, - { - "epoch": 0.5408511465902862, - "grad_norm": 4.42297887802124, - "learning_rate": 4.0985814223495235e-05, - "loss": 0.7064, - "step": 61180 - }, - { - "epoch": 0.5409395498505984, - "grad_norm": 9.926868438720703, - "learning_rate": 4.098434083582336e-05, - "loss": 0.6189, - "step": 61190 - }, - { - "epoch": 0.5410279531109107, - "grad_norm": 8.057879447937012, - "learning_rate": 4.098286744815149e-05, - "loss": 0.7297, - "step": 61200 - }, - { - "epoch": 0.541116356371223, - "grad_norm": 2.313573122024536, - "learning_rate": 4.098139406047961e-05, - "loss": 0.7502, - "step": 61210 - }, - { - "epoch": 0.5412047596315352, - "grad_norm": 2.662954330444336, - "learning_rate": 4.097992067280775e-05, - "loss": 0.6181, - "step": 61220 - }, - { - "epoch": 0.5412931628918475, - "grad_norm": 13.367076873779297, - "learning_rate": 4.097844728513588e-05, - "loss": 0.7337, - "step": 61230 - }, - { - "epoch": 0.5413815661521597, - "grad_norm": 7.762792110443115, - "learning_rate": 4.0976973897464005e-05, - "loss": 0.628, - "step": 61240 - }, - { - "epoch": 0.5414699694124719, - "grad_norm": 10.466089248657227, - "learning_rate": 4.0975500509792134e-05, - "loss": 0.7639, - "step": 61250 - }, - { - "epoch": 0.5415583726727842, - "grad_norm": 4.6191020011901855, - "learning_rate": 4.097402712212027e-05, - "loss": 0.7662, - "step": 61260 - }, - { - "epoch": 0.5416467759330964, - "grad_norm": 3.765852212905884, - "learning_rate": 4.097255373444839e-05, - "loss": 0.8055, - "step": 61270 - }, - { - "epoch": 0.5417351791934086, - "grad_norm": 6.5301055908203125, - "learning_rate": 4.0971080346776525e-05, - "loss": 0.6931, - "step": 61280 - }, - { - "epoch": 0.5418235824537209, - "grad_norm": 4.521273136138916, - "learning_rate": 4.0969606959104654e-05, - "loss": 0.7309, - "step": 61290 - }, - { - "epoch": 0.5419119857140331, - "grad_norm": 3.3177034854888916, - "learning_rate": 4.096813357143278e-05, - "loss": 0.7317, - "step": 61300 - }, - { - "epoch": 0.5420003889743453, - "grad_norm": 12.594902992248535, - "learning_rate": 4.096666018376091e-05, - "loss": 0.6187, - "step": 61310 - }, - { - "epoch": 0.5420887922346577, - "grad_norm": 4.101683139801025, - "learning_rate": 4.096518679608904e-05, - "loss": 0.6698, - "step": 61320 - }, - { - "epoch": 0.5421771954949699, - "grad_norm": 2.899308919906616, - "learning_rate": 4.096371340841717e-05, - "loss": 0.6732, - "step": 61330 - }, - { - "epoch": 0.5422655987552821, - "grad_norm": 14.442461013793945, - "learning_rate": 4.09622400207453e-05, - "loss": 0.7821, - "step": 61340 - }, - { - "epoch": 0.5423540020155944, - "grad_norm": 2.837376832962036, - "learning_rate": 4.096076663307343e-05, - "loss": 0.6085, - "step": 61350 - }, - { - "epoch": 0.5424424052759066, - "grad_norm": 1.6499812602996826, - "learning_rate": 4.095929324540156e-05, - "loss": 0.7831, - "step": 61360 - }, - { - "epoch": 0.5425308085362188, - "grad_norm": 3.734788656234741, - "learning_rate": 4.095781985772969e-05, - "loss": 0.7417, - "step": 61370 - }, - { - "epoch": 0.542619211796531, - "grad_norm": 5.909337043762207, - "learning_rate": 4.0956346470057816e-05, - "loss": 0.655, - "step": 61380 - }, - { - "epoch": 0.5427076150568433, - "grad_norm": 8.06618595123291, - "learning_rate": 4.0954873082385944e-05, - "loss": 0.7344, - "step": 61390 - }, - { - "epoch": 0.5427960183171555, - "grad_norm": 5.110528945922852, - "learning_rate": 4.095339969471408e-05, - "loss": 0.7057, - "step": 61400 - }, - { - "epoch": 0.5428844215774677, - "grad_norm": 5.048502445220947, - "learning_rate": 4.095192630704221e-05, - "loss": 0.6689, - "step": 61410 - }, - { - "epoch": 0.54297282483778, - "grad_norm": 2.925981044769287, - "learning_rate": 4.0950452919370336e-05, - "loss": 0.6583, - "step": 61420 - }, - { - "epoch": 0.5430612280980922, - "grad_norm": 7.1030449867248535, - "learning_rate": 4.0948979531698464e-05, - "loss": 0.4908, - "step": 61430 - }, - { - "epoch": 0.5431496313584045, - "grad_norm": 2.4644997119903564, - "learning_rate": 4.094750614402659e-05, - "loss": 0.6573, - "step": 61440 - }, - { - "epoch": 0.5432380346187168, - "grad_norm": 4.937765121459961, - "learning_rate": 4.094603275635472e-05, - "loss": 0.7287, - "step": 61450 - }, - { - "epoch": 0.543326437879029, - "grad_norm": 3.202455997467041, - "learning_rate": 4.094455936868285e-05, - "loss": 0.6487, - "step": 61460 - }, - { - "epoch": 0.5434148411393412, - "grad_norm": 2.912321090698242, - "learning_rate": 4.0943085981010984e-05, - "loss": 0.6049, - "step": 61470 - }, - { - "epoch": 0.5435032443996535, - "grad_norm": 8.38735294342041, - "learning_rate": 4.094161259333911e-05, - "loss": 0.6741, - "step": 61480 - }, - { - "epoch": 0.5435916476599657, - "grad_norm": 5.502791404724121, - "learning_rate": 4.094013920566724e-05, - "loss": 0.7838, - "step": 61490 - }, - { - "epoch": 0.5436800509202779, - "grad_norm": 4.26815938949585, - "learning_rate": 4.093866581799537e-05, - "loss": 0.7441, - "step": 61500 - }, - { - "epoch": 0.5437684541805902, - "grad_norm": 0.7829049825668335, - "learning_rate": 4.09371924303235e-05, - "loss": 0.6335, - "step": 61510 - }, - { - "epoch": 0.5438568574409024, - "grad_norm": 4.094738483428955, - "learning_rate": 4.0935719042651626e-05, - "loss": 0.6195, - "step": 61520 - }, - { - "epoch": 0.5439452607012146, - "grad_norm": 7.83526611328125, - "learning_rate": 4.093424565497976e-05, - "loss": 0.6159, - "step": 61530 - }, - { - "epoch": 0.5440336639615269, - "grad_norm": 4.799098014831543, - "learning_rate": 4.093277226730789e-05, - "loss": 0.6435, - "step": 61540 - }, - { - "epoch": 0.5441220672218391, - "grad_norm": 2.3341596126556396, - "learning_rate": 4.093129887963602e-05, - "loss": 0.656, - "step": 61550 - }, - { - "epoch": 0.5442104704821514, - "grad_norm": 7.418471336364746, - "learning_rate": 4.0929825491964146e-05, - "loss": 0.6934, - "step": 61560 - }, - { - "epoch": 0.5442988737424637, - "grad_norm": 8.626866340637207, - "learning_rate": 4.0928352104292275e-05, - "loss": 0.6294, - "step": 61570 - }, - { - "epoch": 0.5443872770027759, - "grad_norm": 4.980144500732422, - "learning_rate": 4.09268787166204e-05, - "loss": 0.7899, - "step": 61580 - }, - { - "epoch": 0.5444756802630881, - "grad_norm": 2.441241979598999, - "learning_rate": 4.092540532894854e-05, - "loss": 0.7266, - "step": 61590 - }, - { - "epoch": 0.5445640835234004, - "grad_norm": 4.767135143280029, - "learning_rate": 4.092393194127666e-05, - "loss": 0.6007, - "step": 61600 - }, - { - "epoch": 0.5446524867837126, - "grad_norm": 6.329469203948975, - "learning_rate": 4.0922458553604795e-05, - "loss": 0.5614, - "step": 61610 - }, - { - "epoch": 0.5447408900440248, - "grad_norm": 4.289069175720215, - "learning_rate": 4.092098516593292e-05, - "loss": 0.7063, - "step": 61620 - }, - { - "epoch": 0.544829293304337, - "grad_norm": 4.172220706939697, - "learning_rate": 4.091951177826105e-05, - "loss": 0.6329, - "step": 61630 - }, - { - "epoch": 0.5449176965646493, - "grad_norm": 1.5058437585830688, - "learning_rate": 4.091803839058918e-05, - "loss": 0.5715, - "step": 61640 - }, - { - "epoch": 0.5450060998249615, - "grad_norm": 1.9413853883743286, - "learning_rate": 4.0916565002917315e-05, - "loss": 0.643, - "step": 61650 - }, - { - "epoch": 0.5450945030852737, - "grad_norm": 2.87971568107605, - "learning_rate": 4.091509161524544e-05, - "loss": 0.6928, - "step": 61660 - }, - { - "epoch": 0.545182906345586, - "grad_norm": 8.396600723266602, - "learning_rate": 4.091361822757357e-05, - "loss": 0.7239, - "step": 61670 - }, - { - "epoch": 0.5452713096058983, - "grad_norm": 9.455760955810547, - "learning_rate": 4.0912144839901693e-05, - "loss": 0.7265, - "step": 61680 - }, - { - "epoch": 0.5453597128662105, - "grad_norm": 2.3186399936676025, - "learning_rate": 4.091067145222983e-05, - "loss": 0.6015, - "step": 61690 - }, - { - "epoch": 0.5454481161265228, - "grad_norm": 5.35010290145874, - "learning_rate": 4.090919806455796e-05, - "loss": 0.6971, - "step": 61700 - }, - { - "epoch": 0.545536519386835, - "grad_norm": 2.6566903591156006, - "learning_rate": 4.0907724676886085e-05, - "loss": 0.7582, - "step": 61710 - }, - { - "epoch": 0.5456249226471472, - "grad_norm": 3.0584723949432373, - "learning_rate": 4.0906251289214214e-05, - "loss": 0.7132, - "step": 61720 - }, - { - "epoch": 0.5457133259074595, - "grad_norm": 6.962405681610107, - "learning_rate": 4.090477790154235e-05, - "loss": 0.6459, - "step": 61730 - }, - { - "epoch": 0.5458017291677717, - "grad_norm": 7.146580219268799, - "learning_rate": 4.090330451387047e-05, - "loss": 0.696, - "step": 61740 - }, - { - "epoch": 0.5458901324280839, - "grad_norm": 1.7345503568649292, - "learning_rate": 4.0901831126198605e-05, - "loss": 0.5787, - "step": 61750 - }, - { - "epoch": 0.5459785356883962, - "grad_norm": 2.4765796661376953, - "learning_rate": 4.0900357738526734e-05, - "loss": 0.6041, - "step": 61760 - }, - { - "epoch": 0.5460669389487084, - "grad_norm": 7.141934394836426, - "learning_rate": 4.089888435085486e-05, - "loss": 0.6972, - "step": 61770 - }, - { - "epoch": 0.5461553422090206, - "grad_norm": 6.15211820602417, - "learning_rate": 4.089741096318299e-05, - "loss": 0.7501, - "step": 61780 - }, - { - "epoch": 0.546243745469333, - "grad_norm": 9.471246719360352, - "learning_rate": 4.089593757551112e-05, - "loss": 0.6583, - "step": 61790 - }, - { - "epoch": 0.5463321487296452, - "grad_norm": 2.117295265197754, - "learning_rate": 4.089446418783925e-05, - "loss": 0.7657, - "step": 61800 - }, - { - "epoch": 0.5464205519899574, - "grad_norm": 2.312269926071167, - "learning_rate": 4.089299080016738e-05, - "loss": 0.7321, - "step": 61810 - }, - { - "epoch": 0.5465089552502697, - "grad_norm": 5.327282905578613, - "learning_rate": 4.0891517412495504e-05, - "loss": 0.713, - "step": 61820 - }, - { - "epoch": 0.5465973585105819, - "grad_norm": 4.53051233291626, - "learning_rate": 4.089004402482364e-05, - "loss": 0.6507, - "step": 61830 - }, - { - "epoch": 0.5466857617708941, - "grad_norm": 4.317416191101074, - "learning_rate": 4.088857063715177e-05, - "loss": 0.6268, - "step": 61840 - }, - { - "epoch": 0.5467741650312063, - "grad_norm": 4.29363489151001, - "learning_rate": 4.0887097249479896e-05, - "loss": 0.7671, - "step": 61850 - }, - { - "epoch": 0.5468625682915186, - "grad_norm": 2.106245517730713, - "learning_rate": 4.0885623861808024e-05, - "loss": 0.6954, - "step": 61860 - }, - { - "epoch": 0.5469509715518308, - "grad_norm": 4.963006496429443, - "learning_rate": 4.088415047413616e-05, - "loss": 0.5751, - "step": 61870 - }, - { - "epoch": 0.547039374812143, - "grad_norm": 3.012784957885742, - "learning_rate": 4.088267708646428e-05, - "loss": 0.5432, - "step": 61880 - }, - { - "epoch": 0.5471277780724553, - "grad_norm": 2.363640069961548, - "learning_rate": 4.0881203698792416e-05, - "loss": 0.6364, - "step": 61890 - }, - { - "epoch": 0.5472161813327675, - "grad_norm": 4.916860103607178, - "learning_rate": 4.0879730311120544e-05, - "loss": 0.7315, - "step": 61900 - }, - { - "epoch": 0.5473045845930798, - "grad_norm": 13.727585792541504, - "learning_rate": 4.087825692344867e-05, - "loss": 0.837, - "step": 61910 - }, - { - "epoch": 0.5473929878533921, - "grad_norm": 3.6278133392333984, - "learning_rate": 4.08767835357768e-05, - "loss": 0.745, - "step": 61920 - }, - { - "epoch": 0.5474813911137043, - "grad_norm": 4.7713775634765625, - "learning_rate": 4.087531014810493e-05, - "loss": 0.6926, - "step": 61930 - }, - { - "epoch": 0.5475697943740165, - "grad_norm": 4.205535411834717, - "learning_rate": 4.087383676043306e-05, - "loss": 0.59, - "step": 61940 - }, - { - "epoch": 0.5476581976343288, - "grad_norm": 4.933831214904785, - "learning_rate": 4.087236337276119e-05, - "loss": 0.7224, - "step": 61950 - }, - { - "epoch": 0.547746600894641, - "grad_norm": 5.320747375488281, - "learning_rate": 4.0870889985089314e-05, - "loss": 0.7419, - "step": 61960 - }, - { - "epoch": 0.5478350041549532, - "grad_norm": 2.2930240631103516, - "learning_rate": 4.086941659741745e-05, - "loss": 0.6866, - "step": 61970 - }, - { - "epoch": 0.5479234074152655, - "grad_norm": 2.417546033859253, - "learning_rate": 4.086794320974558e-05, - "loss": 0.6766, - "step": 61980 - }, - { - "epoch": 0.5480118106755777, - "grad_norm": 4.099992275238037, - "learning_rate": 4.0866469822073706e-05, - "loss": 0.7691, - "step": 61990 - }, - { - "epoch": 0.5481002139358899, - "grad_norm": 7.419407844543457, - "learning_rate": 4.0864996434401835e-05, - "loss": 0.5728, - "step": 62000 - }, - { - "epoch": 0.5481886171962022, - "grad_norm": 3.2243587970733643, - "learning_rate": 4.086352304672997e-05, - "loss": 0.7734, - "step": 62010 - }, - { - "epoch": 0.5482770204565144, - "grad_norm": 3.938995599746704, - "learning_rate": 4.086204965905809e-05, - "loss": 0.6605, - "step": 62020 - }, - { - "epoch": 0.5483654237168267, - "grad_norm": 4.894810676574707, - "learning_rate": 4.0860576271386226e-05, - "loss": 0.8069, - "step": 62030 - }, - { - "epoch": 0.548453826977139, - "grad_norm": 1.3692402839660645, - "learning_rate": 4.085910288371435e-05, - "loss": 0.6191, - "step": 62040 - }, - { - "epoch": 0.5485422302374512, - "grad_norm": 7.836918354034424, - "learning_rate": 4.085762949604248e-05, - "loss": 0.7416, - "step": 62050 - }, - { - "epoch": 0.5486306334977634, - "grad_norm": 3.5874412059783936, - "learning_rate": 4.085615610837061e-05, - "loss": 0.8542, - "step": 62060 - }, - { - "epoch": 0.5487190367580757, - "grad_norm": 1.5806734561920166, - "learning_rate": 4.085468272069874e-05, - "loss": 0.6764, - "step": 62070 - }, - { - "epoch": 0.5488074400183879, - "grad_norm": 1.5705277919769287, - "learning_rate": 4.085320933302687e-05, - "loss": 0.5837, - "step": 62080 - }, - { - "epoch": 0.5488958432787001, - "grad_norm": 2.244008779525757, - "learning_rate": 4.0851735945355e-05, - "loss": 0.662, - "step": 62090 - }, - { - "epoch": 0.5489842465390123, - "grad_norm": 1.9295954704284668, - "learning_rate": 4.0850262557683125e-05, - "loss": 0.6677, - "step": 62100 - }, - { - "epoch": 0.5490726497993246, - "grad_norm": 4.382631778717041, - "learning_rate": 4.084878917001126e-05, - "loss": 0.5893, - "step": 62110 - }, - { - "epoch": 0.5491610530596368, - "grad_norm": 4.301537036895752, - "learning_rate": 4.084731578233939e-05, - "loss": 0.6719, - "step": 62120 - }, - { - "epoch": 0.549249456319949, - "grad_norm": 4.617231369018555, - "learning_rate": 4.084584239466752e-05, - "loss": 0.8648, - "step": 62130 - }, - { - "epoch": 0.5493378595802613, - "grad_norm": 4.951000690460205, - "learning_rate": 4.0844369006995645e-05, - "loss": 0.7484, - "step": 62140 - }, - { - "epoch": 0.5494262628405736, - "grad_norm": 3.792289972305298, - "learning_rate": 4.0842895619323773e-05, - "loss": 0.6072, - "step": 62150 - }, - { - "epoch": 0.5495146661008858, - "grad_norm": 5.221554279327393, - "learning_rate": 4.08414222316519e-05, - "loss": 0.8724, - "step": 62160 - }, - { - "epoch": 0.5496030693611981, - "grad_norm": 4.404388904571533, - "learning_rate": 4.083994884398004e-05, - "loss": 0.7609, - "step": 62170 - }, - { - "epoch": 0.5496914726215103, - "grad_norm": 3.834625482559204, - "learning_rate": 4.083847545630816e-05, - "loss": 0.6846, - "step": 62180 - }, - { - "epoch": 0.5497798758818225, - "grad_norm": 1.522938847541809, - "learning_rate": 4.0837002068636294e-05, - "loss": 0.7164, - "step": 62190 - }, - { - "epoch": 0.5498682791421348, - "grad_norm": 1.1847554445266724, - "learning_rate": 4.083552868096442e-05, - "loss": 0.7044, - "step": 62200 - }, - { - "epoch": 0.549956682402447, - "grad_norm": 2.3974757194519043, - "learning_rate": 4.083405529329255e-05, - "loss": 0.6138, - "step": 62210 - }, - { - "epoch": 0.5500450856627592, - "grad_norm": 1.6963533163070679, - "learning_rate": 4.083258190562068e-05, - "loss": 0.6669, - "step": 62220 - }, - { - "epoch": 0.5501334889230715, - "grad_norm": 4.234209060668945, - "learning_rate": 4.0831108517948814e-05, - "loss": 0.6213, - "step": 62230 - }, - { - "epoch": 0.5502218921833837, - "grad_norm": 4.611056327819824, - "learning_rate": 4.0829635130276935e-05, - "loss": 0.6935, - "step": 62240 - }, - { - "epoch": 0.5503102954436959, - "grad_norm": 3.103215456008911, - "learning_rate": 4.082816174260507e-05, - "loss": 0.7327, - "step": 62250 - }, - { - "epoch": 0.5503986987040081, - "grad_norm": 3.6425230503082275, - "learning_rate": 4.08266883549332e-05, - "loss": 0.6821, - "step": 62260 - }, - { - "epoch": 0.5504871019643205, - "grad_norm": 14.899616241455078, - "learning_rate": 4.082521496726133e-05, - "loss": 0.7051, - "step": 62270 - }, - { - "epoch": 0.5505755052246327, - "grad_norm": 1.558464765548706, - "learning_rate": 4.0823741579589456e-05, - "loss": 0.7049, - "step": 62280 - }, - { - "epoch": 0.550663908484945, - "grad_norm": 1.8028136491775513, - "learning_rate": 4.0822268191917584e-05, - "loss": 0.7035, - "step": 62290 - }, - { - "epoch": 0.5507523117452572, - "grad_norm": 1.6013673543930054, - "learning_rate": 4.082079480424571e-05, - "loss": 0.8175, - "step": 62300 - }, - { - "epoch": 0.5508407150055694, - "grad_norm": 2.7481155395507812, - "learning_rate": 4.081932141657385e-05, - "loss": 0.6141, - "step": 62310 - }, - { - "epoch": 0.5509291182658816, - "grad_norm": 1.3825974464416504, - "learning_rate": 4.0817848028901976e-05, - "loss": 0.6164, - "step": 62320 - }, - { - "epoch": 0.5510175215261939, - "grad_norm": 3.1612820625305176, - "learning_rate": 4.0816374641230104e-05, - "loss": 0.671, - "step": 62330 - }, - { - "epoch": 0.5511059247865061, - "grad_norm": 7.41900110244751, - "learning_rate": 4.081490125355823e-05, - "loss": 0.8493, - "step": 62340 - }, - { - "epoch": 0.5511943280468183, - "grad_norm": 5.661056041717529, - "learning_rate": 4.081342786588636e-05, - "loss": 0.8018, - "step": 62350 - }, - { - "epoch": 0.5512827313071306, - "grad_norm": 3.1594958305358887, - "learning_rate": 4.081195447821449e-05, - "loss": 0.5826, - "step": 62360 - }, - { - "epoch": 0.5513711345674428, - "grad_norm": 3.1781249046325684, - "learning_rate": 4.0810481090542624e-05, - "loss": 0.6851, - "step": 62370 - }, - { - "epoch": 0.5514595378277551, - "grad_norm": 10.142784118652344, - "learning_rate": 4.080900770287075e-05, - "loss": 0.7852, - "step": 62380 - }, - { - "epoch": 0.5515479410880674, - "grad_norm": 6.739329814910889, - "learning_rate": 4.080753431519888e-05, - "loss": 0.6094, - "step": 62390 - }, - { - "epoch": 0.5516363443483796, - "grad_norm": 29.661962509155273, - "learning_rate": 4.080606092752701e-05, - "loss": 0.7518, - "step": 62400 - }, - { - "epoch": 0.5517247476086918, - "grad_norm": 7.815852642059326, - "learning_rate": 4.080458753985514e-05, - "loss": 0.6028, - "step": 62410 - }, - { - "epoch": 0.5518131508690041, - "grad_norm": 6.031912326812744, - "learning_rate": 4.0803114152183266e-05, - "loss": 0.6138, - "step": 62420 - }, - { - "epoch": 0.5519015541293163, - "grad_norm": 3.4759647846221924, - "learning_rate": 4.0801640764511394e-05, - "loss": 0.7457, - "step": 62430 - }, - { - "epoch": 0.5519899573896285, - "grad_norm": 2.1413931846618652, - "learning_rate": 4.080016737683953e-05, - "loss": 0.6812, - "step": 62440 - }, - { - "epoch": 0.5520783606499408, - "grad_norm": 14.791672706604004, - "learning_rate": 4.079869398916766e-05, - "loss": 0.7169, - "step": 62450 - }, - { - "epoch": 0.552166763910253, - "grad_norm": 3.942746162414551, - "learning_rate": 4.0797220601495786e-05, - "loss": 0.5147, - "step": 62460 - }, - { - "epoch": 0.5522551671705652, - "grad_norm": 3.358020067214966, - "learning_rate": 4.0795747213823915e-05, - "loss": 0.7153, - "step": 62470 - }, - { - "epoch": 0.5523435704308775, - "grad_norm": 1.729537844657898, - "learning_rate": 4.079427382615204e-05, - "loss": 0.7613, - "step": 62480 - }, - { - "epoch": 0.5524319736911897, - "grad_norm": 2.4452719688415527, - "learning_rate": 4.079280043848017e-05, - "loss": 0.5938, - "step": 62490 - }, - { - "epoch": 0.552520376951502, - "grad_norm": 4.644379138946533, - "learning_rate": 4.0791327050808307e-05, - "loss": 0.7941, - "step": 62500 - }, - { - "epoch": 0.5526087802118143, - "grad_norm": 5.547428607940674, - "learning_rate": 4.078985366313643e-05, - "loss": 0.7711, - "step": 62510 - }, - { - "epoch": 0.5526971834721265, - "grad_norm": 1.1845346689224243, - "learning_rate": 4.078838027546456e-05, - "loss": 0.6581, - "step": 62520 - }, - { - "epoch": 0.5527855867324387, - "grad_norm": 4.600733757019043, - "learning_rate": 4.078690688779269e-05, - "loss": 0.7008, - "step": 62530 - }, - { - "epoch": 0.552873989992751, - "grad_norm": 1.6078307628631592, - "learning_rate": 4.078543350012082e-05, - "loss": 0.678, - "step": 62540 - }, - { - "epoch": 0.5529623932530632, - "grad_norm": 2.8887579441070557, - "learning_rate": 4.078396011244895e-05, - "loss": 0.7163, - "step": 62550 - }, - { - "epoch": 0.5530507965133754, - "grad_norm": 13.506646156311035, - "learning_rate": 4.0782486724777083e-05, - "loss": 0.7312, - "step": 62560 - }, - { - "epoch": 0.5531391997736876, - "grad_norm": 3.7088615894317627, - "learning_rate": 4.0781013337105205e-05, - "loss": 0.6049, - "step": 62570 - }, - { - "epoch": 0.5532276030339999, - "grad_norm": 4.431879043579102, - "learning_rate": 4.077953994943334e-05, - "loss": 0.8565, - "step": 62580 - }, - { - "epoch": 0.5533160062943121, - "grad_norm": 4.177177429199219, - "learning_rate": 4.077806656176147e-05, - "loss": 0.7676, - "step": 62590 - }, - { - "epoch": 0.5534044095546243, - "grad_norm": 3.7162039279937744, - "learning_rate": 4.07765931740896e-05, - "loss": 0.6364, - "step": 62600 - }, - { - "epoch": 0.5534928128149366, - "grad_norm": 2.4209091663360596, - "learning_rate": 4.0775119786417725e-05, - "loss": 0.737, - "step": 62610 - }, - { - "epoch": 0.5535812160752489, - "grad_norm": 2.490230083465576, - "learning_rate": 4.0773646398745854e-05, - "loss": 0.6825, - "step": 62620 - }, - { - "epoch": 0.5536696193355611, - "grad_norm": 5.82399320602417, - "learning_rate": 4.077217301107398e-05, - "loss": 0.7084, - "step": 62630 - }, - { - "epoch": 0.5537580225958734, - "grad_norm": 3.0212762355804443, - "learning_rate": 4.077069962340212e-05, - "loss": 0.6651, - "step": 62640 - }, - { - "epoch": 0.5538464258561856, - "grad_norm": 3.4942400455474854, - "learning_rate": 4.076922623573024e-05, - "loss": 0.7842, - "step": 62650 - }, - { - "epoch": 0.5539348291164978, - "grad_norm": 2.226706027984619, - "learning_rate": 4.0767752848058374e-05, - "loss": 0.591, - "step": 62660 - }, - { - "epoch": 0.5540232323768101, - "grad_norm": 3.081984281539917, - "learning_rate": 4.07662794603865e-05, - "loss": 0.6871, - "step": 62670 - }, - { - "epoch": 0.5541116356371223, - "grad_norm": 4.682066917419434, - "learning_rate": 4.076480607271463e-05, - "loss": 0.615, - "step": 62680 - }, - { - "epoch": 0.5542000388974345, - "grad_norm": 4.302504539489746, - "learning_rate": 4.076333268504276e-05, - "loss": 0.8338, - "step": 62690 - }, - { - "epoch": 0.5542884421577468, - "grad_norm": 3.471498727798462, - "learning_rate": 4.0761859297370894e-05, - "loss": 0.8216, - "step": 62700 - }, - { - "epoch": 0.554376845418059, - "grad_norm": 5.331011772155762, - "learning_rate": 4.0760385909699015e-05, - "loss": 0.6846, - "step": 62710 - }, - { - "epoch": 0.5544652486783712, - "grad_norm": 2.4722280502319336, - "learning_rate": 4.075891252202715e-05, - "loss": 0.6023, - "step": 62720 - }, - { - "epoch": 0.5545536519386834, - "grad_norm": 2.3275868892669678, - "learning_rate": 4.075743913435528e-05, - "loss": 0.6967, - "step": 62730 - }, - { - "epoch": 0.5546420551989958, - "grad_norm": 3.251068592071533, - "learning_rate": 4.075596574668341e-05, - "loss": 0.6167, - "step": 62740 - }, - { - "epoch": 0.554730458459308, - "grad_norm": 3.044917345046997, - "learning_rate": 4.0754492359011536e-05, - "loss": 0.7082, - "step": 62750 - }, - { - "epoch": 0.5548188617196202, - "grad_norm": 5.043300151824951, - "learning_rate": 4.0753018971339664e-05, - "loss": 0.7656, - "step": 62760 - }, - { - "epoch": 0.5549072649799325, - "grad_norm": 2.0907504558563232, - "learning_rate": 4.075154558366779e-05, - "loss": 0.7462, - "step": 62770 - }, - { - "epoch": 0.5549956682402447, - "grad_norm": 3.9822773933410645, - "learning_rate": 4.075007219599593e-05, - "loss": 0.6726, - "step": 62780 - }, - { - "epoch": 0.5550840715005569, - "grad_norm": 6.933872222900391, - "learning_rate": 4.074859880832405e-05, - "loss": 0.6487, - "step": 62790 - }, - { - "epoch": 0.5551724747608692, - "grad_norm": 3.5690855979919434, - "learning_rate": 4.0747125420652184e-05, - "loss": 0.6857, - "step": 62800 - }, - { - "epoch": 0.5552608780211814, - "grad_norm": 3.5433759689331055, - "learning_rate": 4.074565203298031e-05, - "loss": 0.6341, - "step": 62810 - }, - { - "epoch": 0.5553492812814936, - "grad_norm": 3.214184284210205, - "learning_rate": 4.074417864530844e-05, - "loss": 0.6289, - "step": 62820 - }, - { - "epoch": 0.5554376845418059, - "grad_norm": 4.199161529541016, - "learning_rate": 4.074270525763657e-05, - "loss": 0.7053, - "step": 62830 - }, - { - "epoch": 0.5555260878021181, - "grad_norm": 2.815532684326172, - "learning_rate": 4.0741231869964704e-05, - "loss": 0.7036, - "step": 62840 - }, - { - "epoch": 0.5556144910624304, - "grad_norm": 4.300614833831787, - "learning_rate": 4.0739758482292826e-05, - "loss": 0.7452, - "step": 62850 - }, - { - "epoch": 0.5557028943227427, - "grad_norm": 1.6825121641159058, - "learning_rate": 4.073828509462096e-05, - "loss": 0.6978, - "step": 62860 - }, - { - "epoch": 0.5557912975830549, - "grad_norm": 7.21452522277832, - "learning_rate": 4.073681170694908e-05, - "loss": 0.618, - "step": 62870 - }, - { - "epoch": 0.5558797008433671, - "grad_norm": 3.448690176010132, - "learning_rate": 4.073533831927722e-05, - "loss": 0.5747, - "step": 62880 - }, - { - "epoch": 0.5559681041036794, - "grad_norm": 3.577071189880371, - "learning_rate": 4.0733864931605346e-05, - "loss": 0.6362, - "step": 62890 - }, - { - "epoch": 0.5560565073639916, - "grad_norm": 2.1710007190704346, - "learning_rate": 4.0732391543933475e-05, - "loss": 0.622, - "step": 62900 - }, - { - "epoch": 0.5561449106243038, - "grad_norm": 2.937624931335449, - "learning_rate": 4.07309181562616e-05, - "loss": 0.6407, - "step": 62910 - }, - { - "epoch": 0.556233313884616, - "grad_norm": 3.720982313156128, - "learning_rate": 4.072944476858974e-05, - "loss": 0.6316, - "step": 62920 - }, - { - "epoch": 0.5563217171449283, - "grad_norm": 1.773363709449768, - "learning_rate": 4.072797138091786e-05, - "loss": 0.5862, - "step": 62930 - }, - { - "epoch": 0.5564101204052405, - "grad_norm": 2.2955081462860107, - "learning_rate": 4.0726497993245995e-05, - "loss": 0.7583, - "step": 62940 - }, - { - "epoch": 0.5564985236655527, - "grad_norm": 7.422618389129639, - "learning_rate": 4.072502460557412e-05, - "loss": 0.7207, - "step": 62950 - }, - { - "epoch": 0.556586926925865, - "grad_norm": 2.2737765312194824, - "learning_rate": 4.072355121790225e-05, - "loss": 0.6888, - "step": 62960 - }, - { - "epoch": 0.5566753301861773, - "grad_norm": 1.7188626527786255, - "learning_rate": 4.072207783023038e-05, - "loss": 0.7744, - "step": 62970 - }, - { - "epoch": 0.5567637334464896, - "grad_norm": 7.6729044914245605, - "learning_rate": 4.072060444255851e-05, - "loss": 0.6764, - "step": 62980 - }, - { - "epoch": 0.5568521367068018, - "grad_norm": 2.308917284011841, - "learning_rate": 4.0719131054886637e-05, - "loss": 0.6571, - "step": 62990 - }, - { - "epoch": 0.556940539967114, - "grad_norm": 2.4506571292877197, - "learning_rate": 4.071765766721477e-05, - "loss": 0.5771, - "step": 63000 - }, - { - "epoch": 0.5570289432274262, - "grad_norm": 3.898029088973999, - "learning_rate": 4.071618427954289e-05, - "loss": 0.6696, - "step": 63010 - }, - { - "epoch": 0.5571173464877385, - "grad_norm": 7.187760353088379, - "learning_rate": 4.071471089187103e-05, - "loss": 0.6918, - "step": 63020 - }, - { - "epoch": 0.5572057497480507, - "grad_norm": 6.468751430511475, - "learning_rate": 4.071323750419916e-05, - "loss": 0.6641, - "step": 63030 - }, - { - "epoch": 0.5572941530083629, - "grad_norm": 3.098630666732788, - "learning_rate": 4.0711764116527285e-05, - "loss": 0.683, - "step": 63040 - }, - { - "epoch": 0.5573825562686752, - "grad_norm": 5.286598205566406, - "learning_rate": 4.0710290728855413e-05, - "loss": 0.7044, - "step": 63050 - }, - { - "epoch": 0.5574709595289874, - "grad_norm": 2.019216775894165, - "learning_rate": 4.070881734118355e-05, - "loss": 0.7049, - "step": 63060 - }, - { - "epoch": 0.5575593627892996, - "grad_norm": 4.356855392456055, - "learning_rate": 4.070734395351167e-05, - "loss": 0.8097, - "step": 63070 - }, - { - "epoch": 0.5576477660496119, - "grad_norm": 5.114986419677734, - "learning_rate": 4.0705870565839805e-05, - "loss": 0.8909, - "step": 63080 - }, - { - "epoch": 0.5577361693099242, - "grad_norm": 1.8002680540084839, - "learning_rate": 4.070439717816793e-05, - "loss": 0.6946, - "step": 63090 - }, - { - "epoch": 0.5578245725702364, - "grad_norm": 4.779249668121338, - "learning_rate": 4.070292379049606e-05, - "loss": 0.7003, - "step": 63100 - }, - { - "epoch": 0.5579129758305487, - "grad_norm": 17.739381790161133, - "learning_rate": 4.070145040282419e-05, - "loss": 0.6996, - "step": 63110 - }, - { - "epoch": 0.5580013790908609, - "grad_norm": 4.2556071281433105, - "learning_rate": 4.069997701515232e-05, - "loss": 0.7975, - "step": 63120 - }, - { - "epoch": 0.5580897823511731, - "grad_norm": 7.880222320556641, - "learning_rate": 4.069850362748045e-05, - "loss": 0.6855, - "step": 63130 - }, - { - "epoch": 0.5581781856114854, - "grad_norm": 3.777540683746338, - "learning_rate": 4.069703023980858e-05, - "loss": 0.7618, - "step": 63140 - }, - { - "epoch": 0.5582665888717976, - "grad_norm": 2.8185460567474365, - "learning_rate": 4.0695556852136704e-05, - "loss": 0.7183, - "step": 63150 - }, - { - "epoch": 0.5583549921321098, - "grad_norm": 2.2345547676086426, - "learning_rate": 4.069408346446484e-05, - "loss": 0.7175, - "step": 63160 - }, - { - "epoch": 0.558443395392422, - "grad_norm": 2.3325679302215576, - "learning_rate": 4.069261007679297e-05, - "loss": 0.5852, - "step": 63170 - }, - { - "epoch": 0.5585317986527343, - "grad_norm": 2.6563258171081543, - "learning_rate": 4.0691136689121096e-05, - "loss": 0.723, - "step": 63180 - }, - { - "epoch": 0.5586202019130465, - "grad_norm": 3.589921712875366, - "learning_rate": 4.0689663301449224e-05, - "loss": 0.7361, - "step": 63190 - }, - { - "epoch": 0.5587086051733587, - "grad_norm": 3.861968994140625, - "learning_rate": 4.068818991377736e-05, - "loss": 0.724, - "step": 63200 - }, - { - "epoch": 0.5587970084336711, - "grad_norm": 8.236564636230469, - "learning_rate": 4.068671652610548e-05, - "loss": 0.675, - "step": 63210 - }, - { - "epoch": 0.5588854116939833, - "grad_norm": 1.6007232666015625, - "learning_rate": 4.0685243138433616e-05, - "loss": 0.7619, - "step": 63220 - }, - { - "epoch": 0.5589738149542955, - "grad_norm": 8.26694393157959, - "learning_rate": 4.0683769750761744e-05, - "loss": 0.7679, - "step": 63230 - }, - { - "epoch": 0.5590622182146078, - "grad_norm": 1.9998350143432617, - "learning_rate": 4.068229636308987e-05, - "loss": 0.7287, - "step": 63240 - }, - { - "epoch": 0.55915062147492, - "grad_norm": 4.671010494232178, - "learning_rate": 4.0680822975418e-05, - "loss": 0.6734, - "step": 63250 - }, - { - "epoch": 0.5592390247352322, - "grad_norm": 1.9065940380096436, - "learning_rate": 4.067934958774613e-05, - "loss": 0.7706, - "step": 63260 - }, - { - "epoch": 0.5593274279955445, - "grad_norm": 8.396730422973633, - "learning_rate": 4.067787620007426e-05, - "loss": 0.7225, - "step": 63270 - }, - { - "epoch": 0.5594158312558567, - "grad_norm": 3.9346113204956055, - "learning_rate": 4.067640281240239e-05, - "loss": 0.7234, - "step": 63280 - }, - { - "epoch": 0.5595042345161689, - "grad_norm": 6.4728803634643555, - "learning_rate": 4.067492942473052e-05, - "loss": 0.859, - "step": 63290 - }, - { - "epoch": 0.5595926377764812, - "grad_norm": 6.107116222381592, - "learning_rate": 4.067345603705865e-05, - "loss": 0.7255, - "step": 63300 - }, - { - "epoch": 0.5596810410367934, - "grad_norm": 2.462620973587036, - "learning_rate": 4.067198264938678e-05, - "loss": 0.7193, - "step": 63310 - }, - { - "epoch": 0.5597694442971056, - "grad_norm": 1.311640977859497, - "learning_rate": 4.0670509261714906e-05, - "loss": 0.679, - "step": 63320 - }, - { - "epoch": 0.559857847557418, - "grad_norm": 3.0472991466522217, - "learning_rate": 4.0669035874043034e-05, - "loss": 0.6367, - "step": 63330 - }, - { - "epoch": 0.5599462508177302, - "grad_norm": 3.4147191047668457, - "learning_rate": 4.066756248637116e-05, - "loss": 0.5648, - "step": 63340 - }, - { - "epoch": 0.5600346540780424, - "grad_norm": 5.423051357269287, - "learning_rate": 4.06660890986993e-05, - "loss": 0.5828, - "step": 63350 - }, - { - "epoch": 0.5601230573383547, - "grad_norm": 0.9590580463409424, - "learning_rate": 4.0664615711027426e-05, - "loss": 0.6916, - "step": 63360 - }, - { - "epoch": 0.5602114605986669, - "grad_norm": 1.4947302341461182, - "learning_rate": 4.0663142323355555e-05, - "loss": 0.6561, - "step": 63370 - }, - { - "epoch": 0.5602998638589791, - "grad_norm": 4.569432258605957, - "learning_rate": 4.066166893568368e-05, - "loss": 0.7948, - "step": 63380 - }, - { - "epoch": 0.5603882671192914, - "grad_norm": 1.5504459142684937, - "learning_rate": 4.066019554801181e-05, - "loss": 0.6398, - "step": 63390 - }, - { - "epoch": 0.5604766703796036, - "grad_norm": 10.235306739807129, - "learning_rate": 4.065872216033994e-05, - "loss": 0.7731, - "step": 63400 - }, - { - "epoch": 0.5605650736399158, - "grad_norm": 1.8039636611938477, - "learning_rate": 4.0657248772668075e-05, - "loss": 0.636, - "step": 63410 - }, - { - "epoch": 0.560653476900228, - "grad_norm": 5.058978080749512, - "learning_rate": 4.06557753849962e-05, - "loss": 0.83, - "step": 63420 - }, - { - "epoch": 0.5607418801605403, - "grad_norm": 7.112691402435303, - "learning_rate": 4.065430199732433e-05, - "loss": 0.6408, - "step": 63430 - }, - { - "epoch": 0.5608302834208526, - "grad_norm": 6.672459125518799, - "learning_rate": 4.065282860965246e-05, - "loss": 0.7014, - "step": 63440 - }, - { - "epoch": 0.5609186866811648, - "grad_norm": 6.996494293212891, - "learning_rate": 4.065135522198059e-05, - "loss": 0.6675, - "step": 63450 - }, - { - "epoch": 0.5610070899414771, - "grad_norm": 16.02535629272461, - "learning_rate": 4.0649881834308717e-05, - "loss": 0.621, - "step": 63460 - }, - { - "epoch": 0.5610954932017893, - "grad_norm": 3.3546600341796875, - "learning_rate": 4.064840844663685e-05, - "loss": 0.6007, - "step": 63470 - }, - { - "epoch": 0.5611838964621015, - "grad_norm": 7.53946590423584, - "learning_rate": 4.064693505896497e-05, - "loss": 0.7236, - "step": 63480 - }, - { - "epoch": 0.5612722997224138, - "grad_norm": 6.919145584106445, - "learning_rate": 4.064546167129311e-05, - "loss": 0.773, - "step": 63490 - }, - { - "epoch": 0.561360702982726, - "grad_norm": 12.32744026184082, - "learning_rate": 4.064398828362124e-05, - "loss": 0.7469, - "step": 63500 - }, - { - "epoch": 0.5614491062430382, - "grad_norm": 3.5610527992248535, - "learning_rate": 4.0642514895949365e-05, - "loss": 0.7002, - "step": 63510 - }, - { - "epoch": 0.5615375095033505, - "grad_norm": 2.0038833618164062, - "learning_rate": 4.0641041508277493e-05, - "loss": 0.6595, - "step": 63520 - }, - { - "epoch": 0.5616259127636627, - "grad_norm": 2.8348968029022217, - "learning_rate": 4.063956812060563e-05, - "loss": 0.6184, - "step": 63530 - }, - { - "epoch": 0.5617143160239749, - "grad_norm": 12.131918907165527, - "learning_rate": 4.063809473293375e-05, - "loss": 0.7021, - "step": 63540 - }, - { - "epoch": 0.5618027192842872, - "grad_norm": 8.311182022094727, - "learning_rate": 4.0636621345261885e-05, - "loss": 0.7012, - "step": 63550 - }, - { - "epoch": 0.5618911225445995, - "grad_norm": 5.661182403564453, - "learning_rate": 4.063514795759001e-05, - "loss": 0.7304, - "step": 63560 - }, - { - "epoch": 0.5619795258049117, - "grad_norm": 3.6150553226470947, - "learning_rate": 4.063367456991814e-05, - "loss": 0.6578, - "step": 63570 - }, - { - "epoch": 0.562067929065224, - "grad_norm": 9.561511993408203, - "learning_rate": 4.063220118224627e-05, - "loss": 0.6741, - "step": 63580 - }, - { - "epoch": 0.5621563323255362, - "grad_norm": 7.904484272003174, - "learning_rate": 4.06307277945744e-05, - "loss": 0.7737, - "step": 63590 - }, - { - "epoch": 0.5622447355858484, - "grad_norm": 6.017001628875732, - "learning_rate": 4.062925440690253e-05, - "loss": 0.7488, - "step": 63600 - }, - { - "epoch": 0.5623331388461607, - "grad_norm": 4.022983074188232, - "learning_rate": 4.062778101923066e-05, - "loss": 0.6404, - "step": 63610 - }, - { - "epoch": 0.5624215421064729, - "grad_norm": 2.01179838180542, - "learning_rate": 4.0626307631558784e-05, - "loss": 0.6649, - "step": 63620 - }, - { - "epoch": 0.5625099453667851, - "grad_norm": 1.6342637538909912, - "learning_rate": 4.062483424388692e-05, - "loss": 0.73, - "step": 63630 - }, - { - "epoch": 0.5625983486270973, - "grad_norm": 3.375537395477295, - "learning_rate": 4.062336085621505e-05, - "loss": 0.7265, - "step": 63640 - }, - { - "epoch": 0.5626867518874096, - "grad_norm": 1.0897823572158813, - "learning_rate": 4.0621887468543176e-05, - "loss": 0.6606, - "step": 63650 - }, - { - "epoch": 0.5627751551477218, - "grad_norm": 8.687250137329102, - "learning_rate": 4.0620414080871304e-05, - "loss": 0.72, - "step": 63660 - }, - { - "epoch": 0.562863558408034, - "grad_norm": 7.519293308258057, - "learning_rate": 4.061894069319944e-05, - "loss": 0.7433, - "step": 63670 - }, - { - "epoch": 0.5629519616683464, - "grad_norm": 1.4763296842575073, - "learning_rate": 4.061746730552756e-05, - "loss": 0.5541, - "step": 63680 - }, - { - "epoch": 0.5630403649286586, - "grad_norm": 11.709550857543945, - "learning_rate": 4.0615993917855696e-05, - "loss": 0.7339, - "step": 63690 - }, - { - "epoch": 0.5631287681889708, - "grad_norm": 1.6048792600631714, - "learning_rate": 4.061452053018382e-05, - "loss": 0.5828, - "step": 63700 - }, - { - "epoch": 0.5632171714492831, - "grad_norm": 3.0552163124084473, - "learning_rate": 4.061304714251195e-05, - "loss": 0.7597, - "step": 63710 - }, - { - "epoch": 0.5633055747095953, - "grad_norm": 13.991518020629883, - "learning_rate": 4.061157375484008e-05, - "loss": 0.7509, - "step": 63720 - }, - { - "epoch": 0.5633939779699075, - "grad_norm": 2.3903353214263916, - "learning_rate": 4.061010036716821e-05, - "loss": 0.8877, - "step": 63730 - }, - { - "epoch": 0.5634823812302198, - "grad_norm": 6.726386547088623, - "learning_rate": 4.060862697949634e-05, - "loss": 0.5926, - "step": 63740 - }, - { - "epoch": 0.563570784490532, - "grad_norm": 4.98592472076416, - "learning_rate": 4.060715359182447e-05, - "loss": 0.7932, - "step": 63750 - }, - { - "epoch": 0.5636591877508442, - "grad_norm": 4.511202812194824, - "learning_rate": 4.0605680204152594e-05, - "loss": 0.5619, - "step": 63760 - }, - { - "epoch": 0.5637475910111565, - "grad_norm": 7.471907138824463, - "learning_rate": 4.060420681648073e-05, - "loss": 0.6135, - "step": 63770 - }, - { - "epoch": 0.5638359942714687, - "grad_norm": 15.382930755615234, - "learning_rate": 4.060273342880886e-05, - "loss": 0.7252, - "step": 63780 - }, - { - "epoch": 0.5639243975317809, - "grad_norm": 1.4106415510177612, - "learning_rate": 4.0601260041136986e-05, - "loss": 0.6339, - "step": 63790 - }, - { - "epoch": 0.5640128007920933, - "grad_norm": 3.6871800422668457, - "learning_rate": 4.0599786653465114e-05, - "loss": 0.6325, - "step": 63800 - }, - { - "epoch": 0.5641012040524055, - "grad_norm": 3.1861133575439453, - "learning_rate": 4.059831326579324e-05, - "loss": 0.7985, - "step": 63810 - }, - { - "epoch": 0.5641896073127177, - "grad_norm": 6.78101110458374, - "learning_rate": 4.059683987812137e-05, - "loss": 0.7035, - "step": 63820 - }, - { - "epoch": 0.56427801057303, - "grad_norm": 2.288201093673706, - "learning_rate": 4.0595366490449506e-05, - "loss": 0.6991, - "step": 63830 - }, - { - "epoch": 0.5643664138333422, - "grad_norm": 2.836599826812744, - "learning_rate": 4.059389310277763e-05, - "loss": 0.6553, - "step": 63840 - }, - { - "epoch": 0.5644548170936544, - "grad_norm": 3.3340418338775635, - "learning_rate": 4.059241971510576e-05, - "loss": 0.6236, - "step": 63850 - }, - { - "epoch": 0.5645432203539666, - "grad_norm": 2.1750998497009277, - "learning_rate": 4.059094632743389e-05, - "loss": 0.6451, - "step": 63860 - }, - { - "epoch": 0.5646316236142789, - "grad_norm": 2.46244740486145, - "learning_rate": 4.058947293976202e-05, - "loss": 0.7091, - "step": 63870 - }, - { - "epoch": 0.5647200268745911, - "grad_norm": 3.372239828109741, - "learning_rate": 4.058799955209015e-05, - "loss": 0.7002, - "step": 63880 - }, - { - "epoch": 0.5648084301349033, - "grad_norm": 4.3613362312316895, - "learning_rate": 4.058652616441828e-05, - "loss": 0.5936, - "step": 63890 - }, - { - "epoch": 0.5648968333952156, - "grad_norm": 1.1230272054672241, - "learning_rate": 4.0585052776746405e-05, - "loss": 0.6175, - "step": 63900 - }, - { - "epoch": 0.5649852366555278, - "grad_norm": 1.0587000846862793, - "learning_rate": 4.058357938907454e-05, - "loss": 0.6762, - "step": 63910 - }, - { - "epoch": 0.5650736399158401, - "grad_norm": 2.200409412384033, - "learning_rate": 4.058210600140266e-05, - "loss": 0.8633, - "step": 63920 - }, - { - "epoch": 0.5651620431761524, - "grad_norm": 3.6095399856567383, - "learning_rate": 4.05806326137308e-05, - "loss": 0.7222, - "step": 63930 - }, - { - "epoch": 0.5652504464364646, - "grad_norm": 10.71081829071045, - "learning_rate": 4.0579159226058925e-05, - "loss": 0.7224, - "step": 63940 - }, - { - "epoch": 0.5653388496967768, - "grad_norm": 4.717750072479248, - "learning_rate": 4.057768583838705e-05, - "loss": 0.7204, - "step": 63950 - }, - { - "epoch": 0.5654272529570891, - "grad_norm": 1.6694716215133667, - "learning_rate": 4.057621245071518e-05, - "loss": 0.5246, - "step": 63960 - }, - { - "epoch": 0.5655156562174013, - "grad_norm": 1.9727901220321655, - "learning_rate": 4.057473906304332e-05, - "loss": 0.667, - "step": 63970 - }, - { - "epoch": 0.5656040594777135, - "grad_norm": 2.3130252361297607, - "learning_rate": 4.057326567537144e-05, - "loss": 0.5482, - "step": 63980 - }, - { - "epoch": 0.5656924627380258, - "grad_norm": 5.267376899719238, - "learning_rate": 4.0571792287699574e-05, - "loss": 0.6814, - "step": 63990 - }, - { - "epoch": 0.565780865998338, - "grad_norm": 1.3883095979690552, - "learning_rate": 4.05703189000277e-05, - "loss": 0.5598, - "step": 64000 - }, - { - "epoch": 0.5658692692586502, - "grad_norm": 7.479804039001465, - "learning_rate": 4.056884551235583e-05, - "loss": 0.8481, - "step": 64010 - }, - { - "epoch": 0.5659576725189625, - "grad_norm": 5.832056999206543, - "learning_rate": 4.056737212468396e-05, - "loss": 0.6619, - "step": 64020 - }, - { - "epoch": 0.5660460757792748, - "grad_norm": 4.774280071258545, - "learning_rate": 4.056589873701209e-05, - "loss": 0.6966, - "step": 64030 - }, - { - "epoch": 0.566134479039587, - "grad_norm": 2.387117624282837, - "learning_rate": 4.0564425349340215e-05, - "loss": 0.6996, - "step": 64040 - }, - { - "epoch": 0.5662228822998993, - "grad_norm": 2.4382340908050537, - "learning_rate": 4.056295196166835e-05, - "loss": 0.6519, - "step": 64050 - }, - { - "epoch": 0.5663112855602115, - "grad_norm": 6.238379955291748, - "learning_rate": 4.056147857399647e-05, - "loss": 0.7063, - "step": 64060 - }, - { - "epoch": 0.5663996888205237, - "grad_norm": 3.4802324771881104, - "learning_rate": 4.056000518632461e-05, - "loss": 0.6635, - "step": 64070 - }, - { - "epoch": 0.566488092080836, - "grad_norm": 1.2869523763656616, - "learning_rate": 4.0558531798652736e-05, - "loss": 0.5068, - "step": 64080 - }, - { - "epoch": 0.5665764953411482, - "grad_norm": 2.2430055141448975, - "learning_rate": 4.0557058410980864e-05, - "loss": 0.7223, - "step": 64090 - }, - { - "epoch": 0.5666648986014604, - "grad_norm": 2.5135419368743896, - "learning_rate": 4.055558502330899e-05, - "loss": 0.655, - "step": 64100 - }, - { - "epoch": 0.5667533018617726, - "grad_norm": 6.266421794891357, - "learning_rate": 4.055411163563713e-05, - "loss": 0.7837, - "step": 64110 - }, - { - "epoch": 0.5668417051220849, - "grad_norm": 1.3949779272079468, - "learning_rate": 4.055263824796525e-05, - "loss": 0.6651, - "step": 64120 - }, - { - "epoch": 0.5669301083823971, - "grad_norm": 13.763872146606445, - "learning_rate": 4.0551164860293384e-05, - "loss": 0.6233, - "step": 64130 - }, - { - "epoch": 0.5670185116427093, - "grad_norm": 3.128615379333496, - "learning_rate": 4.054969147262151e-05, - "loss": 0.6717, - "step": 64140 - }, - { - "epoch": 0.5671069149030217, - "grad_norm": 1.4400246143341064, - "learning_rate": 4.054821808494964e-05, - "loss": 0.7256, - "step": 64150 - }, - { - "epoch": 0.5671953181633339, - "grad_norm": 1.9768950939178467, - "learning_rate": 4.054674469727777e-05, - "loss": 0.6783, - "step": 64160 - }, - { - "epoch": 0.5672837214236461, - "grad_norm": 2.8429722785949707, - "learning_rate": 4.05452713096059e-05, - "loss": 0.6701, - "step": 64170 - }, - { - "epoch": 0.5673721246839584, - "grad_norm": 4.377533912658691, - "learning_rate": 4.0543797921934026e-05, - "loss": 0.6689, - "step": 64180 - }, - { - "epoch": 0.5674605279442706, - "grad_norm": 4.206196308135986, - "learning_rate": 4.054232453426216e-05, - "loss": 0.8279, - "step": 64190 - }, - { - "epoch": 0.5675489312045828, - "grad_norm": 5.869049072265625, - "learning_rate": 4.054085114659029e-05, - "loss": 0.7438, - "step": 64200 - }, - { - "epoch": 0.5676373344648951, - "grad_norm": 6.651582717895508, - "learning_rate": 4.053937775891842e-05, - "loss": 0.6973, - "step": 64210 - }, - { - "epoch": 0.5677257377252073, - "grad_norm": 3.322523355484009, - "learning_rate": 4.0537904371246546e-05, - "loss": 0.5148, - "step": 64220 - }, - { - "epoch": 0.5678141409855195, - "grad_norm": 3.9739694595336914, - "learning_rate": 4.0536430983574674e-05, - "loss": 0.7276, - "step": 64230 - }, - { - "epoch": 0.5679025442458318, - "grad_norm": 3.5674448013305664, - "learning_rate": 4.05349575959028e-05, - "loss": 0.7397, - "step": 64240 - }, - { - "epoch": 0.567990947506144, - "grad_norm": 4.198413848876953, - "learning_rate": 4.053348420823094e-05, - "loss": 0.6122, - "step": 64250 - }, - { - "epoch": 0.5680793507664562, - "grad_norm": 1.4144407510757446, - "learning_rate": 4.0532010820559066e-05, - "loss": 0.7536, - "step": 64260 - }, - { - "epoch": 0.5681677540267686, - "grad_norm": 3.42972731590271, - "learning_rate": 4.0530537432887195e-05, - "loss": 0.6837, - "step": 64270 - }, - { - "epoch": 0.5682561572870808, - "grad_norm": 2.9982354640960693, - "learning_rate": 4.052906404521532e-05, - "loss": 0.5659, - "step": 64280 - }, - { - "epoch": 0.568344560547393, - "grad_norm": 6.105149269104004, - "learning_rate": 4.052759065754345e-05, - "loss": 0.8467, - "step": 64290 - }, - { - "epoch": 0.5684329638077052, - "grad_norm": 3.902435779571533, - "learning_rate": 4.052611726987158e-05, - "loss": 0.7616, - "step": 64300 - }, - { - "epoch": 0.5685213670680175, - "grad_norm": 3.6051948070526123, - "learning_rate": 4.052464388219971e-05, - "loss": 0.7347, - "step": 64310 - }, - { - "epoch": 0.5686097703283297, - "grad_norm": 2.8388123512268066, - "learning_rate": 4.052317049452784e-05, - "loss": 0.7009, - "step": 64320 - }, - { - "epoch": 0.5686981735886419, - "grad_norm": 1.8754425048828125, - "learning_rate": 4.052169710685597e-05, - "loss": 0.7271, - "step": 64330 - }, - { - "epoch": 0.5687865768489542, - "grad_norm": 1.294590950012207, - "learning_rate": 4.05202237191841e-05, - "loss": 0.7239, - "step": 64340 - }, - { - "epoch": 0.5688749801092664, - "grad_norm": 6.263574600219727, - "learning_rate": 4.051875033151223e-05, - "loss": 0.6336, - "step": 64350 - }, - { - "epoch": 0.5689633833695786, - "grad_norm": 2.241262674331665, - "learning_rate": 4.0517276943840357e-05, - "loss": 0.6028, - "step": 64360 - }, - { - "epoch": 0.5690517866298909, - "grad_norm": 8.491609573364258, - "learning_rate": 4.0515803556168485e-05, - "loss": 0.7002, - "step": 64370 - }, - { - "epoch": 0.5691401898902031, - "grad_norm": 16.753889083862305, - "learning_rate": 4.051433016849662e-05, - "loss": 0.6694, - "step": 64380 - }, - { - "epoch": 0.5692285931505154, - "grad_norm": 5.131870269775391, - "learning_rate": 4.051285678082474e-05, - "loss": 0.6292, - "step": 64390 - }, - { - "epoch": 0.5693169964108277, - "grad_norm": 1.8675251007080078, - "learning_rate": 4.051138339315288e-05, - "loss": 0.5255, - "step": 64400 - }, - { - "epoch": 0.5694053996711399, - "grad_norm": 5.0697431564331055, - "learning_rate": 4.0509910005481005e-05, - "loss": 0.7475, - "step": 64410 - }, - { - "epoch": 0.5694938029314521, - "grad_norm": 6.377092361450195, - "learning_rate": 4.0508436617809133e-05, - "loss": 0.7615, - "step": 64420 - }, - { - "epoch": 0.5695822061917644, - "grad_norm": 1.8682746887207031, - "learning_rate": 4.050696323013726e-05, - "loss": 0.6611, - "step": 64430 - }, - { - "epoch": 0.5696706094520766, - "grad_norm": 3.793256998062134, - "learning_rate": 4.05054898424654e-05, - "loss": 0.6795, - "step": 64440 - }, - { - "epoch": 0.5697590127123888, - "grad_norm": 8.376814842224121, - "learning_rate": 4.050401645479352e-05, - "loss": 0.675, - "step": 64450 - }, - { - "epoch": 0.569847415972701, - "grad_norm": 4.916607856750488, - "learning_rate": 4.0502543067121654e-05, - "loss": 0.7669, - "step": 64460 - }, - { - "epoch": 0.5699358192330133, - "grad_norm": 1.955283761024475, - "learning_rate": 4.050106967944978e-05, - "loss": 0.6457, - "step": 64470 - }, - { - "epoch": 0.5700242224933255, - "grad_norm": 1.6909441947937012, - "learning_rate": 4.049959629177791e-05, - "loss": 0.5638, - "step": 64480 - }, - { - "epoch": 0.5701126257536377, - "grad_norm": 5.194309234619141, - "learning_rate": 4.049812290410604e-05, - "loss": 0.709, - "step": 64490 - }, - { - "epoch": 0.5702010290139501, - "grad_norm": 1.4350976943969727, - "learning_rate": 4.049664951643417e-05, - "loss": 0.7526, - "step": 64500 - }, - { - "epoch": 0.5702894322742623, - "grad_norm": 1.5561854839324951, - "learning_rate": 4.0495176128762295e-05, - "loss": 0.5437, - "step": 64510 - }, - { - "epoch": 0.5703778355345746, - "grad_norm": 6.050344944000244, - "learning_rate": 4.049370274109043e-05, - "loss": 0.6692, - "step": 64520 - }, - { - "epoch": 0.5704662387948868, - "grad_norm": 2.152381658554077, - "learning_rate": 4.049222935341855e-05, - "loss": 0.4805, - "step": 64530 - }, - { - "epoch": 0.570554642055199, - "grad_norm": 9.078564643859863, - "learning_rate": 4.049075596574669e-05, - "loss": 0.7142, - "step": 64540 - }, - { - "epoch": 0.5706430453155112, - "grad_norm": 3.8883821964263916, - "learning_rate": 4.0489282578074816e-05, - "loss": 0.7341, - "step": 64550 - }, - { - "epoch": 0.5707314485758235, - "grad_norm": 4.619812965393066, - "learning_rate": 4.0487809190402944e-05, - "loss": 0.6385, - "step": 64560 - }, - { - "epoch": 0.5708198518361357, - "grad_norm": 3.550454616546631, - "learning_rate": 4.048633580273107e-05, - "loss": 0.7299, - "step": 64570 - }, - { - "epoch": 0.5709082550964479, - "grad_norm": 10.33322525024414, - "learning_rate": 4.048486241505921e-05, - "loss": 0.7653, - "step": 64580 - }, - { - "epoch": 0.5709966583567602, - "grad_norm": 11.441985130310059, - "learning_rate": 4.048338902738733e-05, - "loss": 0.7106, - "step": 64590 - }, - { - "epoch": 0.5710850616170724, - "grad_norm": 6.242215156555176, - "learning_rate": 4.0481915639715464e-05, - "loss": 0.7193, - "step": 64600 - }, - { - "epoch": 0.5711734648773846, - "grad_norm": 15.218245506286621, - "learning_rate": 4.048044225204359e-05, - "loss": 0.7969, - "step": 64610 - }, - { - "epoch": 0.571261868137697, - "grad_norm": 2.025117874145508, - "learning_rate": 4.047896886437172e-05, - "loss": 0.6353, - "step": 64620 - }, - { - "epoch": 0.5713502713980092, - "grad_norm": 10.602964401245117, - "learning_rate": 4.047749547669985e-05, - "loss": 0.9693, - "step": 64630 - }, - { - "epoch": 0.5714386746583214, - "grad_norm": 2.2587060928344727, - "learning_rate": 4.047602208902798e-05, - "loss": 0.726, - "step": 64640 - }, - { - "epoch": 0.5715270779186337, - "grad_norm": 3.2599239349365234, - "learning_rate": 4.0474548701356106e-05, - "loss": 0.8149, - "step": 64650 - }, - { - "epoch": 0.5716154811789459, - "grad_norm": 4.679561138153076, - "learning_rate": 4.047307531368424e-05, - "loss": 0.7313, - "step": 64660 - }, - { - "epoch": 0.5717038844392581, - "grad_norm": 8.353294372558594, - "learning_rate": 4.047160192601236e-05, - "loss": 0.6951, - "step": 64670 - }, - { - "epoch": 0.5717922876995704, - "grad_norm": 2.8976051807403564, - "learning_rate": 4.04701285383405e-05, - "loss": 0.6465, - "step": 64680 - }, - { - "epoch": 0.5718806909598826, - "grad_norm": 3.4874932765960693, - "learning_rate": 4.0468655150668626e-05, - "loss": 0.6176, - "step": 64690 - }, - { - "epoch": 0.5719690942201948, - "grad_norm": 4.699406623840332, - "learning_rate": 4.0467181762996754e-05, - "loss": 0.66, - "step": 64700 - }, - { - "epoch": 0.572057497480507, - "grad_norm": 2.328927755355835, - "learning_rate": 4.046570837532488e-05, - "loss": 0.7214, - "step": 64710 - }, - { - "epoch": 0.5721459007408193, - "grad_norm": 1.8024733066558838, - "learning_rate": 4.046423498765302e-05, - "loss": 0.5946, - "step": 64720 - }, - { - "epoch": 0.5722343040011315, - "grad_norm": 5.493916034698486, - "learning_rate": 4.046276159998114e-05, - "loss": 0.6911, - "step": 64730 - }, - { - "epoch": 0.5723227072614439, - "grad_norm": 3.101094961166382, - "learning_rate": 4.0461288212309275e-05, - "loss": 0.7828, - "step": 64740 - }, - { - "epoch": 0.5724111105217561, - "grad_norm": 13.01898193359375, - "learning_rate": 4.0459814824637396e-05, - "loss": 0.7767, - "step": 64750 - }, - { - "epoch": 0.5724995137820683, - "grad_norm": 4.371884346008301, - "learning_rate": 4.045834143696553e-05, - "loss": 0.6418, - "step": 64760 - }, - { - "epoch": 0.5725879170423805, - "grad_norm": 1.7412854433059692, - "learning_rate": 4.045686804929366e-05, - "loss": 0.6645, - "step": 64770 - }, - { - "epoch": 0.5726763203026928, - "grad_norm": 3.0693604946136475, - "learning_rate": 4.045539466162179e-05, - "loss": 0.5786, - "step": 64780 - }, - { - "epoch": 0.572764723563005, - "grad_norm": 1.506795883178711, - "learning_rate": 4.0453921273949916e-05, - "loss": 0.6994, - "step": 64790 - }, - { - "epoch": 0.5728531268233172, - "grad_norm": 4.32845401763916, - "learning_rate": 4.045244788627805e-05, - "loss": 0.6652, - "step": 64800 - }, - { - "epoch": 0.5729415300836295, - "grad_norm": 5.896068572998047, - "learning_rate": 4.045097449860617e-05, - "loss": 0.6759, - "step": 64810 - }, - { - "epoch": 0.5730299333439417, - "grad_norm": 8.06838607788086, - "learning_rate": 4.044950111093431e-05, - "loss": 0.7163, - "step": 64820 - }, - { - "epoch": 0.5731183366042539, - "grad_norm": 3.362428665161133, - "learning_rate": 4.0448027723262437e-05, - "loss": 0.8562, - "step": 64830 - }, - { - "epoch": 0.5732067398645662, - "grad_norm": 3.129589319229126, - "learning_rate": 4.0446554335590565e-05, - "loss": 0.4917, - "step": 64840 - }, - { - "epoch": 0.5732951431248784, - "grad_norm": 8.218469619750977, - "learning_rate": 4.044508094791869e-05, - "loss": 0.6536, - "step": 64850 - }, - { - "epoch": 0.5733835463851907, - "grad_norm": 2.535635232925415, - "learning_rate": 4.044360756024682e-05, - "loss": 0.7227, - "step": 64860 - }, - { - "epoch": 0.573471949645503, - "grad_norm": 7.734989643096924, - "learning_rate": 4.044213417257495e-05, - "loss": 0.7018, - "step": 64870 - }, - { - "epoch": 0.5735603529058152, - "grad_norm": 2.6127724647521973, - "learning_rate": 4.0440660784903085e-05, - "loss": 0.7118, - "step": 64880 - }, - { - "epoch": 0.5736487561661274, - "grad_norm": 3.806565761566162, - "learning_rate": 4.043918739723121e-05, - "loss": 0.649, - "step": 64890 - }, - { - "epoch": 0.5737371594264397, - "grad_norm": 4.902356147766113, - "learning_rate": 4.043771400955934e-05, - "loss": 0.6632, - "step": 64900 - }, - { - "epoch": 0.5738255626867519, - "grad_norm": 1.861557960510254, - "learning_rate": 4.043624062188747e-05, - "loss": 0.6707, - "step": 64910 - }, - { - "epoch": 0.5739139659470641, - "grad_norm": 11.613804817199707, - "learning_rate": 4.04347672342156e-05, - "loss": 0.6406, - "step": 64920 - }, - { - "epoch": 0.5740023692073764, - "grad_norm": 1.9614821672439575, - "learning_rate": 4.043329384654373e-05, - "loss": 0.7516, - "step": 64930 - }, - { - "epoch": 0.5740907724676886, - "grad_norm": 1.5052820444107056, - "learning_rate": 4.043182045887186e-05, - "loss": 0.6, - "step": 64940 - }, - { - "epoch": 0.5741791757280008, - "grad_norm": 3.503469467163086, - "learning_rate": 4.0430347071199984e-05, - "loss": 0.7295, - "step": 64950 - }, - { - "epoch": 0.574267578988313, - "grad_norm": 1.8578046560287476, - "learning_rate": 4.042887368352812e-05, - "loss": 0.6088, - "step": 64960 - }, - { - "epoch": 0.5743559822486253, - "grad_norm": 7.115762233734131, - "learning_rate": 4.042740029585625e-05, - "loss": 0.6956, - "step": 64970 - }, - { - "epoch": 0.5744443855089376, - "grad_norm": 3.296905994415283, - "learning_rate": 4.0425926908184375e-05, - "loss": 0.5472, - "step": 64980 - }, - { - "epoch": 0.5745327887692498, - "grad_norm": 4.446994781494141, - "learning_rate": 4.0424453520512504e-05, - "loss": 0.6906, - "step": 64990 - }, - { - "epoch": 0.5746211920295621, - "grad_norm": 2.829014539718628, - "learning_rate": 4.042298013284063e-05, - "loss": 0.7285, - "step": 65000 - }, - { - "epoch": 0.5747095952898743, - "grad_norm": 13.407563209533691, - "learning_rate": 4.042150674516876e-05, - "loss": 0.6594, - "step": 65010 - }, - { - "epoch": 0.5747979985501865, - "grad_norm": 5.668613910675049, - "learning_rate": 4.0420033357496896e-05, - "loss": 0.7137, - "step": 65020 - }, - { - "epoch": 0.5748864018104988, - "grad_norm": 2.314248561859131, - "learning_rate": 4.041855996982502e-05, - "loss": 0.5666, - "step": 65030 - }, - { - "epoch": 0.574974805070811, - "grad_norm": 2.1190857887268066, - "learning_rate": 4.041708658215315e-05, - "loss": 0.7266, - "step": 65040 - }, - { - "epoch": 0.5750632083311232, - "grad_norm": 5.008220672607422, - "learning_rate": 4.041561319448128e-05, - "loss": 0.7779, - "step": 65050 - }, - { - "epoch": 0.5751516115914355, - "grad_norm": 2.3553032875061035, - "learning_rate": 4.041413980680941e-05, - "loss": 0.734, - "step": 65060 - }, - { - "epoch": 0.5752400148517477, - "grad_norm": 2.6983978748321533, - "learning_rate": 4.041266641913754e-05, - "loss": 0.7602, - "step": 65070 - }, - { - "epoch": 0.5753284181120599, - "grad_norm": 5.724380016326904, - "learning_rate": 4.041119303146567e-05, - "loss": 0.6551, - "step": 65080 - }, - { - "epoch": 0.5754168213723723, - "grad_norm": 1.12800931930542, - "learning_rate": 4.0409719643793794e-05, - "loss": 0.663, - "step": 65090 - }, - { - "epoch": 0.5755052246326845, - "grad_norm": 12.098381996154785, - "learning_rate": 4.040824625612193e-05, - "loss": 0.6348, - "step": 65100 - }, - { - "epoch": 0.5755936278929967, - "grad_norm": 1.838953971862793, - "learning_rate": 4.040677286845006e-05, - "loss": 0.8018, - "step": 65110 - }, - { - "epoch": 0.575682031153309, - "grad_norm": 8.700167655944824, - "learning_rate": 4.0405299480778186e-05, - "loss": 0.6953, - "step": 65120 - }, - { - "epoch": 0.5757704344136212, - "grad_norm": 2.6799418926239014, - "learning_rate": 4.0403826093106314e-05, - "loss": 0.6421, - "step": 65130 - }, - { - "epoch": 0.5758588376739334, - "grad_norm": 2.222330331802368, - "learning_rate": 4.040235270543444e-05, - "loss": 0.731, - "step": 65140 - }, - { - "epoch": 0.5759472409342457, - "grad_norm": 6.967453479766846, - "learning_rate": 4.040087931776257e-05, - "loss": 0.607, - "step": 65150 - }, - { - "epoch": 0.5760356441945579, - "grad_norm": 3.4749906063079834, - "learning_rate": 4.0399405930090706e-05, - "loss": 0.4454, - "step": 65160 - }, - { - "epoch": 0.5761240474548701, - "grad_norm": 1.130460262298584, - "learning_rate": 4.0397932542418835e-05, - "loss": 0.6654, - "step": 65170 - }, - { - "epoch": 0.5762124507151823, - "grad_norm": 7.090402126312256, - "learning_rate": 4.039645915474696e-05, - "loss": 0.6064, - "step": 65180 - }, - { - "epoch": 0.5763008539754946, - "grad_norm": 8.348467826843262, - "learning_rate": 4.039498576707509e-05, - "loss": 0.7333, - "step": 65190 - }, - { - "epoch": 0.5763892572358068, - "grad_norm": 2.160935163497925, - "learning_rate": 4.039351237940322e-05, - "loss": 0.5871, - "step": 65200 - }, - { - "epoch": 0.5764776604961191, - "grad_norm": 2.075876474380493, - "learning_rate": 4.039203899173135e-05, - "loss": 0.6993, - "step": 65210 - }, - { - "epoch": 0.5765660637564314, - "grad_norm": 3.337186098098755, - "learning_rate": 4.0390565604059476e-05, - "loss": 0.6739, - "step": 65220 - }, - { - "epoch": 0.5766544670167436, - "grad_norm": 3.3875410556793213, - "learning_rate": 4.038909221638761e-05, - "loss": 0.6239, - "step": 65230 - }, - { - "epoch": 0.5767428702770558, - "grad_norm": 2.0924062728881836, - "learning_rate": 4.038761882871574e-05, - "loss": 0.5542, - "step": 65240 - }, - { - "epoch": 0.5768312735373681, - "grad_norm": 1.5167820453643799, - "learning_rate": 4.038614544104387e-05, - "loss": 0.7349, - "step": 65250 - }, - { - "epoch": 0.5769196767976803, - "grad_norm": 3.1909453868865967, - "learning_rate": 4.0384672053371996e-05, - "loss": 0.745, - "step": 65260 - }, - { - "epoch": 0.5770080800579925, - "grad_norm": 3.2624502182006836, - "learning_rate": 4.0383198665700125e-05, - "loss": 0.6994, - "step": 65270 - }, - { - "epoch": 0.5770964833183048, - "grad_norm": 3.371648073196411, - "learning_rate": 4.038172527802825e-05, - "loss": 0.6332, - "step": 65280 - }, - { - "epoch": 0.577184886578617, - "grad_norm": 5.004847049713135, - "learning_rate": 4.038025189035639e-05, - "loss": 0.6358, - "step": 65290 - }, - { - "epoch": 0.5772732898389292, - "grad_norm": 8.004755973815918, - "learning_rate": 4.037877850268452e-05, - "loss": 0.6212, - "step": 65300 - }, - { - "epoch": 0.5773616930992415, - "grad_norm": 4.12190580368042, - "learning_rate": 4.0377305115012645e-05, - "loss": 0.7918, - "step": 65310 - }, - { - "epoch": 0.5774500963595537, - "grad_norm": 4.382535457611084, - "learning_rate": 4.037583172734077e-05, - "loss": 0.6414, - "step": 65320 - }, - { - "epoch": 0.577538499619866, - "grad_norm": 2.492431640625, - "learning_rate": 4.03743583396689e-05, - "loss": 0.6578, - "step": 65330 - }, - { - "epoch": 0.5776269028801783, - "grad_norm": 10.768872261047363, - "learning_rate": 4.037288495199703e-05, - "loss": 0.6652, - "step": 65340 - }, - { - "epoch": 0.5777153061404905, - "grad_norm": 2.8188974857330322, - "learning_rate": 4.0371411564325165e-05, - "loss": 0.5491, - "step": 65350 - }, - { - "epoch": 0.5778037094008027, - "grad_norm": 31.166112899780273, - "learning_rate": 4.036993817665329e-05, - "loss": 0.7656, - "step": 65360 - }, - { - "epoch": 0.577892112661115, - "grad_norm": 3.75449538230896, - "learning_rate": 4.036846478898142e-05, - "loss": 0.575, - "step": 65370 - }, - { - "epoch": 0.5779805159214272, - "grad_norm": 4.671249866485596, - "learning_rate": 4.036699140130955e-05, - "loss": 0.6896, - "step": 65380 - }, - { - "epoch": 0.5780689191817394, - "grad_norm": 12.375015258789062, - "learning_rate": 4.036551801363768e-05, - "loss": 0.6865, - "step": 65390 - }, - { - "epoch": 0.5781573224420516, - "grad_norm": 3.042642831802368, - "learning_rate": 4.036404462596581e-05, - "loss": 0.5746, - "step": 65400 - }, - { - "epoch": 0.5782457257023639, - "grad_norm": 4.171643257141113, - "learning_rate": 4.036257123829394e-05, - "loss": 0.6592, - "step": 65410 - }, - { - "epoch": 0.5783341289626761, - "grad_norm": 2.997007131576538, - "learning_rate": 4.0361097850622064e-05, - "loss": 0.7502, - "step": 65420 - }, - { - "epoch": 0.5784225322229883, - "grad_norm": 5.75356388092041, - "learning_rate": 4.03596244629502e-05, - "loss": 0.6929, - "step": 65430 - }, - { - "epoch": 0.5785109354833006, - "grad_norm": 2.511322021484375, - "learning_rate": 4.035815107527833e-05, - "loss": 0.6957, - "step": 65440 - }, - { - "epoch": 0.5785993387436129, - "grad_norm": 1.8392339944839478, - "learning_rate": 4.0356677687606456e-05, - "loss": 0.6734, - "step": 65450 - }, - { - "epoch": 0.5786877420039251, - "grad_norm": 5.3239665031433105, - "learning_rate": 4.0355204299934584e-05, - "loss": 0.7657, - "step": 65460 - }, - { - "epoch": 0.5787761452642374, - "grad_norm": 4.524316787719727, - "learning_rate": 4.035373091226271e-05, - "loss": 0.6809, - "step": 65470 - }, - { - "epoch": 0.5788645485245496, - "grad_norm": 1.9566243886947632, - "learning_rate": 4.035225752459084e-05, - "loss": 0.6773, - "step": 65480 - }, - { - "epoch": 0.5789529517848618, - "grad_norm": 7.495596885681152, - "learning_rate": 4.0350784136918976e-05, - "loss": 0.7635, - "step": 65490 - }, - { - "epoch": 0.5790413550451741, - "grad_norm": 1.264648675918579, - "learning_rate": 4.03493107492471e-05, - "loss": 0.689, - "step": 65500 - }, - { - "epoch": 0.5791297583054863, - "grad_norm": 2.051647424697876, - "learning_rate": 4.034783736157523e-05, - "loss": 0.5404, - "step": 65510 - }, - { - "epoch": 0.5792181615657985, - "grad_norm": 3.8799514770507812, - "learning_rate": 4.034636397390336e-05, - "loss": 0.6113, - "step": 65520 - }, - { - "epoch": 0.5793065648261108, - "grad_norm": 2.673724412918091, - "learning_rate": 4.034489058623149e-05, - "loss": 0.6334, - "step": 65530 - }, - { - "epoch": 0.579394968086423, - "grad_norm": 2.2784740924835205, - "learning_rate": 4.034341719855962e-05, - "loss": 0.6283, - "step": 65540 - }, - { - "epoch": 0.5794833713467352, - "grad_norm": 4.802986145019531, - "learning_rate": 4.034194381088775e-05, - "loss": 0.7282, - "step": 65550 - }, - { - "epoch": 0.5795717746070476, - "grad_norm": 16.573030471801758, - "learning_rate": 4.0340470423215874e-05, - "loss": 0.6995, - "step": 65560 - }, - { - "epoch": 0.5796601778673598, - "grad_norm": 5.87691593170166, - "learning_rate": 4.033899703554401e-05, - "loss": 0.778, - "step": 65570 - }, - { - "epoch": 0.579748581127672, - "grad_norm": 3.7554335594177246, - "learning_rate": 4.033752364787213e-05, - "loss": 0.6854, - "step": 65580 - }, - { - "epoch": 0.5798369843879843, - "grad_norm": 6.366271018981934, - "learning_rate": 4.0336050260200266e-05, - "loss": 0.8265, - "step": 65590 - }, - { - "epoch": 0.5799253876482965, - "grad_norm": 8.081609725952148, - "learning_rate": 4.0334576872528394e-05, - "loss": 0.7417, - "step": 65600 - }, - { - "epoch": 0.5800137909086087, - "grad_norm": 1.2006568908691406, - "learning_rate": 4.033310348485652e-05, - "loss": 0.6332, - "step": 65610 - }, - { - "epoch": 0.580102194168921, - "grad_norm": 15.604028701782227, - "learning_rate": 4.033163009718465e-05, - "loss": 0.7658, - "step": 65620 - }, - { - "epoch": 0.5801905974292332, - "grad_norm": 4.313818454742432, - "learning_rate": 4.0330156709512786e-05, - "loss": 0.6091, - "step": 65630 - }, - { - "epoch": 0.5802790006895454, - "grad_norm": 5.838403701782227, - "learning_rate": 4.032868332184091e-05, - "loss": 0.5807, - "step": 65640 - }, - { - "epoch": 0.5803674039498576, - "grad_norm": 7.906291484832764, - "learning_rate": 4.032720993416904e-05, - "loss": 0.7414, - "step": 65650 - }, - { - "epoch": 0.5804558072101699, - "grad_norm": 3.6370160579681396, - "learning_rate": 4.032573654649717e-05, - "loss": 0.673, - "step": 65660 - }, - { - "epoch": 0.5805442104704821, - "grad_norm": 1.5348896980285645, - "learning_rate": 4.03242631588253e-05, - "loss": 0.6197, - "step": 65670 - }, - { - "epoch": 0.5806326137307944, - "grad_norm": 3.536381959915161, - "learning_rate": 4.032278977115343e-05, - "loss": 0.6769, - "step": 65680 - }, - { - "epoch": 0.5807210169911067, - "grad_norm": 5.558623313903809, - "learning_rate": 4.0321316383481556e-05, - "loss": 0.64, - "step": 65690 - }, - { - "epoch": 0.5808094202514189, - "grad_norm": 1.8969066143035889, - "learning_rate": 4.0319842995809685e-05, - "loss": 0.554, - "step": 65700 - }, - { - "epoch": 0.5808978235117311, - "grad_norm": 2.612123727798462, - "learning_rate": 4.031836960813782e-05, - "loss": 0.6549, - "step": 65710 - }, - { - "epoch": 0.5809862267720434, - "grad_norm": 4.810793876647949, - "learning_rate": 4.031689622046594e-05, - "loss": 0.8441, - "step": 65720 - }, - { - "epoch": 0.5810746300323556, - "grad_norm": 2.560311794281006, - "learning_rate": 4.0315422832794077e-05, - "loss": 0.593, - "step": 65730 - }, - { - "epoch": 0.5811630332926678, - "grad_norm": 2.641464948654175, - "learning_rate": 4.0313949445122205e-05, - "loss": 0.6973, - "step": 65740 - }, - { - "epoch": 0.5812514365529801, - "grad_norm": 1.3652746677398682, - "learning_rate": 4.031247605745033e-05, - "loss": 0.6517, - "step": 65750 - }, - { - "epoch": 0.5813398398132923, - "grad_norm": 4.5376811027526855, - "learning_rate": 4.031100266977846e-05, - "loss": 0.5679, - "step": 65760 - }, - { - "epoch": 0.5814282430736045, - "grad_norm": 1.8268612623214722, - "learning_rate": 4.03095292821066e-05, - "loss": 0.7254, - "step": 65770 - }, - { - "epoch": 0.5815166463339168, - "grad_norm": 6.924813747406006, - "learning_rate": 4.030805589443472e-05, - "loss": 0.6783, - "step": 65780 - }, - { - "epoch": 0.581605049594229, - "grad_norm": 10.423727989196777, - "learning_rate": 4.0306582506762853e-05, - "loss": 0.75, - "step": 65790 - }, - { - "epoch": 0.5816934528545413, - "grad_norm": 4.0514631271362305, - "learning_rate": 4.0305109119090975e-05, - "loss": 0.6846, - "step": 65800 - }, - { - "epoch": 0.5817818561148536, - "grad_norm": 17.880250930786133, - "learning_rate": 4.030363573141911e-05, - "loss": 0.7167, - "step": 65810 - }, - { - "epoch": 0.5818702593751658, - "grad_norm": 3.270742177963257, - "learning_rate": 4.030216234374724e-05, - "loss": 0.6176, - "step": 65820 - }, - { - "epoch": 0.581958662635478, - "grad_norm": 4.598825931549072, - "learning_rate": 4.030068895607537e-05, - "loss": 0.7393, - "step": 65830 - }, - { - "epoch": 0.5820470658957902, - "grad_norm": 3.6737895011901855, - "learning_rate": 4.0299215568403495e-05, - "loss": 0.6977, - "step": 65840 - }, - { - "epoch": 0.5821354691561025, - "grad_norm": 7.075663089752197, - "learning_rate": 4.029774218073163e-05, - "loss": 0.6143, - "step": 65850 - }, - { - "epoch": 0.5822238724164147, - "grad_norm": 3.0455093383789062, - "learning_rate": 4.029626879305975e-05, - "loss": 0.7357, - "step": 65860 - }, - { - "epoch": 0.5823122756767269, - "grad_norm": 2.9687275886535645, - "learning_rate": 4.029479540538789e-05, - "loss": 0.6443, - "step": 65870 - }, - { - "epoch": 0.5824006789370392, - "grad_norm": 6.235419273376465, - "learning_rate": 4.0293322017716015e-05, - "loss": 0.767, - "step": 65880 - }, - { - "epoch": 0.5824890821973514, - "grad_norm": 8.538867950439453, - "learning_rate": 4.0291848630044144e-05, - "loss": 0.6702, - "step": 65890 - }, - { - "epoch": 0.5825774854576636, - "grad_norm": 3.7236874103546143, - "learning_rate": 4.029037524237227e-05, - "loss": 0.8373, - "step": 65900 - }, - { - "epoch": 0.5826658887179759, - "grad_norm": 3.416780948638916, - "learning_rate": 4.028890185470041e-05, - "loss": 0.6486, - "step": 65910 - }, - { - "epoch": 0.5827542919782882, - "grad_norm": 2.2044196128845215, - "learning_rate": 4.028742846702853e-05, - "loss": 0.5961, - "step": 65920 - }, - { - "epoch": 0.5828426952386004, - "grad_norm": 1.9446840286254883, - "learning_rate": 4.0285955079356664e-05, - "loss": 0.6601, - "step": 65930 - }, - { - "epoch": 0.5829310984989127, - "grad_norm": 1.5652238130569458, - "learning_rate": 4.0284481691684786e-05, - "loss": 0.6993, - "step": 65940 - }, - { - "epoch": 0.5830195017592249, - "grad_norm": 1.8059438467025757, - "learning_rate": 4.028300830401292e-05, - "loss": 0.7818, - "step": 65950 - }, - { - "epoch": 0.5831079050195371, - "grad_norm": 2.366891860961914, - "learning_rate": 4.028153491634105e-05, - "loss": 0.6826, - "step": 65960 - }, - { - "epoch": 0.5831963082798494, - "grad_norm": 5.274660110473633, - "learning_rate": 4.028006152866918e-05, - "loss": 0.7629, - "step": 65970 - }, - { - "epoch": 0.5832847115401616, - "grad_norm": 6.935770511627197, - "learning_rate": 4.0278588140997306e-05, - "loss": 0.7826, - "step": 65980 - }, - { - "epoch": 0.5833731148004738, - "grad_norm": 3.2195160388946533, - "learning_rate": 4.027711475332544e-05, - "loss": 0.7378, - "step": 65990 - }, - { - "epoch": 0.583461518060786, - "grad_norm": 3.6363794803619385, - "learning_rate": 4.027564136565357e-05, - "loss": 0.6644, - "step": 66000 - }, - { - "epoch": 0.5835499213210983, - "grad_norm": 4.602499961853027, - "learning_rate": 4.02741679779817e-05, - "loss": 0.6812, - "step": 66010 - }, - { - "epoch": 0.5836383245814105, - "grad_norm": 12.960406303405762, - "learning_rate": 4.0272694590309826e-05, - "loss": 0.5673, - "step": 66020 - }, - { - "epoch": 0.5837267278417227, - "grad_norm": 3.3600995540618896, - "learning_rate": 4.0271221202637954e-05, - "loss": 0.6716, - "step": 66030 - }, - { - "epoch": 0.5838151311020351, - "grad_norm": 1.669277310371399, - "learning_rate": 4.026974781496608e-05, - "loss": 0.5967, - "step": 66040 - }, - { - "epoch": 0.5839035343623473, - "grad_norm": 1.8807207345962524, - "learning_rate": 4.026827442729421e-05, - "loss": 0.6004, - "step": 66050 - }, - { - "epoch": 0.5839919376226596, - "grad_norm": 4.403038024902344, - "learning_rate": 4.0266801039622346e-05, - "loss": 0.6126, - "step": 66060 - }, - { - "epoch": 0.5840803408829718, - "grad_norm": 1.1700714826583862, - "learning_rate": 4.0265327651950474e-05, - "loss": 0.7059, - "step": 66070 - }, - { - "epoch": 0.584168744143284, - "grad_norm": 4.6222662925720215, - "learning_rate": 4.02638542642786e-05, - "loss": 0.6469, - "step": 66080 - }, - { - "epoch": 0.5842571474035962, - "grad_norm": 1.3058925867080688, - "learning_rate": 4.026238087660673e-05, - "loss": 0.8034, - "step": 66090 - }, - { - "epoch": 0.5843455506639085, - "grad_norm": 4.285163879394531, - "learning_rate": 4.026090748893486e-05, - "loss": 0.6897, - "step": 66100 - }, - { - "epoch": 0.5844339539242207, - "grad_norm": 1.457245111465454, - "learning_rate": 4.025943410126299e-05, - "loss": 0.7481, - "step": 66110 - }, - { - "epoch": 0.5845223571845329, - "grad_norm": 11.897272109985352, - "learning_rate": 4.025796071359112e-05, - "loss": 0.754, - "step": 66120 - }, - { - "epoch": 0.5846107604448452, - "grad_norm": 4.249215126037598, - "learning_rate": 4.025648732591925e-05, - "loss": 0.7145, - "step": 66130 - }, - { - "epoch": 0.5846991637051574, - "grad_norm": 7.60205078125, - "learning_rate": 4.025501393824738e-05, - "loss": 0.7374, - "step": 66140 - }, - { - "epoch": 0.5847875669654697, - "grad_norm": 3.8163368701934814, - "learning_rate": 4.025354055057551e-05, - "loss": 0.8132, - "step": 66150 - }, - { - "epoch": 0.584875970225782, - "grad_norm": 3.2086496353149414, - "learning_rate": 4.0252067162903636e-05, - "loss": 0.7485, - "step": 66160 - }, - { - "epoch": 0.5849643734860942, - "grad_norm": 2.357443332672119, - "learning_rate": 4.0250593775231765e-05, - "loss": 0.5496, - "step": 66170 - }, - { - "epoch": 0.5850527767464064, - "grad_norm": 9.534817695617676, - "learning_rate": 4.02491203875599e-05, - "loss": 0.6263, - "step": 66180 - }, - { - "epoch": 0.5851411800067187, - "grad_norm": 6.535423755645752, - "learning_rate": 4.024764699988802e-05, - "loss": 0.7151, - "step": 66190 - }, - { - "epoch": 0.5852295832670309, - "grad_norm": 4.886234760284424, - "learning_rate": 4.024617361221616e-05, - "loss": 0.6801, - "step": 66200 - }, - { - "epoch": 0.5853179865273431, - "grad_norm": 1.1482465267181396, - "learning_rate": 4.0244700224544285e-05, - "loss": 0.6077, - "step": 66210 - }, - { - "epoch": 0.5854063897876554, - "grad_norm": 7.958798408508301, - "learning_rate": 4.024322683687241e-05, - "loss": 0.6062, - "step": 66220 - }, - { - "epoch": 0.5854947930479676, - "grad_norm": 5.445904731750488, - "learning_rate": 4.024175344920054e-05, - "loss": 0.7115, - "step": 66230 - }, - { - "epoch": 0.5855831963082798, - "grad_norm": 5.273890972137451, - "learning_rate": 4.024028006152868e-05, - "loss": 0.5686, - "step": 66240 - }, - { - "epoch": 0.585671599568592, - "grad_norm": 1.4359538555145264, - "learning_rate": 4.02388066738568e-05, - "loss": 0.7021, - "step": 66250 - }, - { - "epoch": 0.5857600028289043, - "grad_norm": 3.0926342010498047, - "learning_rate": 4.0237333286184934e-05, - "loss": 0.6771, - "step": 66260 - }, - { - "epoch": 0.5858484060892166, - "grad_norm": 1.8125255107879639, - "learning_rate": 4.0235859898513055e-05, - "loss": 0.7517, - "step": 66270 - }, - { - "epoch": 0.5859368093495289, - "grad_norm": 7.297733783721924, - "learning_rate": 4.023438651084119e-05, - "loss": 0.7357, - "step": 66280 - }, - { - "epoch": 0.5860252126098411, - "grad_norm": 2.663259267807007, - "learning_rate": 4.023291312316932e-05, - "loss": 0.5976, - "step": 66290 - }, - { - "epoch": 0.5861136158701533, - "grad_norm": 7.248925685882568, - "learning_rate": 4.023143973549745e-05, - "loss": 0.6108, - "step": 66300 - }, - { - "epoch": 0.5862020191304655, - "grad_norm": 5.889019966125488, - "learning_rate": 4.0229966347825575e-05, - "loss": 0.671, - "step": 66310 - }, - { - "epoch": 0.5862904223907778, - "grad_norm": 6.2864990234375, - "learning_rate": 4.022849296015371e-05, - "loss": 0.7183, - "step": 66320 - }, - { - "epoch": 0.58637882565109, - "grad_norm": 5.465065002441406, - "learning_rate": 4.022701957248183e-05, - "loss": 0.7012, - "step": 66330 - }, - { - "epoch": 0.5864672289114022, - "grad_norm": 4.232016563415527, - "learning_rate": 4.022554618480997e-05, - "loss": 0.6973, - "step": 66340 - }, - { - "epoch": 0.5865556321717145, - "grad_norm": 4.214752674102783, - "learning_rate": 4.0224072797138095e-05, - "loss": 0.8033, - "step": 66350 - }, - { - "epoch": 0.5866440354320267, - "grad_norm": 6.341785430908203, - "learning_rate": 4.0222599409466224e-05, - "loss": 0.6417, - "step": 66360 - }, - { - "epoch": 0.5867324386923389, - "grad_norm": 1.4448235034942627, - "learning_rate": 4.022112602179435e-05, - "loss": 0.6446, - "step": 66370 - }, - { - "epoch": 0.5868208419526512, - "grad_norm": 4.314273834228516, - "learning_rate": 4.021965263412249e-05, - "loss": 0.7746, - "step": 66380 - }, - { - "epoch": 0.5869092452129635, - "grad_norm": 2.4736480712890625, - "learning_rate": 4.021817924645061e-05, - "loss": 0.5755, - "step": 66390 - }, - { - "epoch": 0.5869976484732757, - "grad_norm": 3.5737013816833496, - "learning_rate": 4.0216705858778744e-05, - "loss": 0.7399, - "step": 66400 - }, - { - "epoch": 0.587086051733588, - "grad_norm": 7.623898029327393, - "learning_rate": 4.0215232471106866e-05, - "loss": 0.7194, - "step": 66410 - }, - { - "epoch": 0.5871744549939002, - "grad_norm": 2.8788838386535645, - "learning_rate": 4.0213759083435e-05, - "loss": 0.6291, - "step": 66420 - }, - { - "epoch": 0.5872628582542124, - "grad_norm": 4.522538185119629, - "learning_rate": 4.021228569576313e-05, - "loss": 0.6819, - "step": 66430 - }, - { - "epoch": 0.5873512615145247, - "grad_norm": 2.913949966430664, - "learning_rate": 4.021081230809126e-05, - "loss": 0.784, - "step": 66440 - }, - { - "epoch": 0.5874396647748369, - "grad_norm": 11.622467994689941, - "learning_rate": 4.0209338920419386e-05, - "loss": 0.663, - "step": 66450 - }, - { - "epoch": 0.5875280680351491, - "grad_norm": 1.5301315784454346, - "learning_rate": 4.020786553274752e-05, - "loss": 0.5763, - "step": 66460 - }, - { - "epoch": 0.5876164712954614, - "grad_norm": 1.5925779342651367, - "learning_rate": 4.020639214507564e-05, - "loss": 0.7092, - "step": 66470 - }, - { - "epoch": 0.5877048745557736, - "grad_norm": 2.24004864692688, - "learning_rate": 4.020491875740378e-05, - "loss": 0.7717, - "step": 66480 - }, - { - "epoch": 0.5877932778160858, - "grad_norm": 9.111332893371582, - "learning_rate": 4.0203445369731906e-05, - "loss": 0.657, - "step": 66490 - }, - { - "epoch": 0.587881681076398, - "grad_norm": 4.095296382904053, - "learning_rate": 4.0201971982060034e-05, - "loss": 0.6275, - "step": 66500 - }, - { - "epoch": 0.5879700843367104, - "grad_norm": 1.959965705871582, - "learning_rate": 4.020049859438816e-05, - "loss": 0.6269, - "step": 66510 - }, - { - "epoch": 0.5880584875970226, - "grad_norm": 5.743034362792969, - "learning_rate": 4.019902520671629e-05, - "loss": 0.6759, - "step": 66520 - }, - { - "epoch": 0.5881468908573348, - "grad_norm": 2.343559503555298, - "learning_rate": 4.019755181904442e-05, - "loss": 0.7763, - "step": 66530 - }, - { - "epoch": 0.5882352941176471, - "grad_norm": 4.0077362060546875, - "learning_rate": 4.0196078431372555e-05, - "loss": 0.7834, - "step": 66540 - }, - { - "epoch": 0.5883236973779593, - "grad_norm": 4.8376946449279785, - "learning_rate": 4.0194605043700676e-05, - "loss": 0.6889, - "step": 66550 - }, - { - "epoch": 0.5884121006382715, - "grad_norm": 4.436862468719482, - "learning_rate": 4.019313165602881e-05, - "loss": 0.5604, - "step": 66560 - }, - { - "epoch": 0.5885005038985838, - "grad_norm": 18.047977447509766, - "learning_rate": 4.019165826835694e-05, - "loss": 0.7222, - "step": 66570 - }, - { - "epoch": 0.588588907158896, - "grad_norm": 0.8846916556358337, - "learning_rate": 4.019018488068507e-05, - "loss": 0.6758, - "step": 66580 - }, - { - "epoch": 0.5886773104192082, - "grad_norm": 2.162344217300415, - "learning_rate": 4.0188711493013196e-05, - "loss": 0.6119, - "step": 66590 - }, - { - "epoch": 0.5887657136795205, - "grad_norm": 1.688251256942749, - "learning_rate": 4.018723810534133e-05, - "loss": 0.8133, - "step": 66600 - }, - { - "epoch": 0.5888541169398327, - "grad_norm": 4.278174877166748, - "learning_rate": 4.018576471766945e-05, - "loss": 0.8368, - "step": 66610 - }, - { - "epoch": 0.588942520200145, - "grad_norm": 6.618953227996826, - "learning_rate": 4.018429132999759e-05, - "loss": 0.7329, - "step": 66620 - }, - { - "epoch": 0.5890309234604573, - "grad_norm": 8.79420280456543, - "learning_rate": 4.018281794232571e-05, - "loss": 0.6397, - "step": 66630 - }, - { - "epoch": 0.5891193267207695, - "grad_norm": 4.479436874389648, - "learning_rate": 4.0181344554653845e-05, - "loss": 0.7685, - "step": 66640 - }, - { - "epoch": 0.5892077299810817, - "grad_norm": 0.97049880027771, - "learning_rate": 4.017987116698197e-05, - "loss": 0.6728, - "step": 66650 - }, - { - "epoch": 0.589296133241394, - "grad_norm": 2.2249491214752197, - "learning_rate": 4.01783977793101e-05, - "loss": 0.5655, - "step": 66660 - }, - { - "epoch": 0.5893845365017062, - "grad_norm": 6.43360710144043, - "learning_rate": 4.017692439163823e-05, - "loss": 0.6488, - "step": 66670 - }, - { - "epoch": 0.5894729397620184, - "grad_norm": 2.6443846225738525, - "learning_rate": 4.0175451003966365e-05, - "loss": 0.7102, - "step": 66680 - }, - { - "epoch": 0.5895613430223307, - "grad_norm": 5.7051286697387695, - "learning_rate": 4.017397761629449e-05, - "loss": 0.8128, - "step": 66690 - }, - { - "epoch": 0.5896497462826429, - "grad_norm": 1.7258425951004028, - "learning_rate": 4.017250422862262e-05, - "loss": 0.7701, - "step": 66700 - }, - { - "epoch": 0.5897381495429551, - "grad_norm": 3.6739070415496826, - "learning_rate": 4.017103084095075e-05, - "loss": 0.5741, - "step": 66710 - }, - { - "epoch": 0.5898265528032673, - "grad_norm": 1.321820855140686, - "learning_rate": 4.016955745327888e-05, - "loss": 0.622, - "step": 66720 - }, - { - "epoch": 0.5899149560635796, - "grad_norm": 2.5632264614105225, - "learning_rate": 4.016808406560701e-05, - "loss": 0.5827, - "step": 66730 - }, - { - "epoch": 0.5900033593238919, - "grad_norm": 2.0769879817962646, - "learning_rate": 4.0166610677935135e-05, - "loss": 0.7996, - "step": 66740 - }, - { - "epoch": 0.5900917625842041, - "grad_norm": 6.099169731140137, - "learning_rate": 4.0165137290263264e-05, - "loss": 0.7308, - "step": 66750 - }, - { - "epoch": 0.5901801658445164, - "grad_norm": 17.474607467651367, - "learning_rate": 4.01636639025914e-05, - "loss": 0.6336, - "step": 66760 - }, - { - "epoch": 0.5902685691048286, - "grad_norm": 4.201455116271973, - "learning_rate": 4.016219051491952e-05, - "loss": 0.7295, - "step": 66770 - }, - { - "epoch": 0.5903569723651408, - "grad_norm": 3.24267315864563, - "learning_rate": 4.0160717127247655e-05, - "loss": 0.6257, - "step": 66780 - }, - { - "epoch": 0.5904453756254531, - "grad_norm": 7.121216773986816, - "learning_rate": 4.0159243739575784e-05, - "loss": 0.6715, - "step": 66790 - }, - { - "epoch": 0.5905337788857653, - "grad_norm": 1.9864522218704224, - "learning_rate": 4.015777035190391e-05, - "loss": 0.7091, - "step": 66800 - }, - { - "epoch": 0.5906221821460775, - "grad_norm": 1.4397283792495728, - "learning_rate": 4.015629696423204e-05, - "loss": 0.7288, - "step": 66810 - }, - { - "epoch": 0.5907105854063898, - "grad_norm": 10.089944839477539, - "learning_rate": 4.0154823576560176e-05, - "loss": 0.5199, - "step": 66820 - }, - { - "epoch": 0.590798988666702, - "grad_norm": 2.347733974456787, - "learning_rate": 4.01533501888883e-05, - "loss": 0.7035, - "step": 66830 - }, - { - "epoch": 0.5908873919270142, - "grad_norm": 6.436496257781982, - "learning_rate": 4.015187680121643e-05, - "loss": 0.7507, - "step": 66840 - }, - { - "epoch": 0.5909757951873265, - "grad_norm": 2.4167637825012207, - "learning_rate": 4.015040341354456e-05, - "loss": 0.6771, - "step": 66850 - }, - { - "epoch": 0.5910641984476388, - "grad_norm": 18.951627731323242, - "learning_rate": 4.014893002587269e-05, - "loss": 0.7464, - "step": 66860 - }, - { - "epoch": 0.591152601707951, - "grad_norm": 3.278839588165283, - "learning_rate": 4.014745663820082e-05, - "loss": 0.6733, - "step": 66870 - }, - { - "epoch": 0.5912410049682633, - "grad_norm": 1.3182076215744019, - "learning_rate": 4.0145983250528946e-05, - "loss": 0.5269, - "step": 66880 - }, - { - "epoch": 0.5913294082285755, - "grad_norm": 15.219719886779785, - "learning_rate": 4.0144509862857074e-05, - "loss": 0.6556, - "step": 66890 - }, - { - "epoch": 0.5914178114888877, - "grad_norm": 2.8584930896759033, - "learning_rate": 4.014303647518521e-05, - "loss": 0.6389, - "step": 66900 - }, - { - "epoch": 0.5915062147492, - "grad_norm": 2.181023597717285, - "learning_rate": 4.014156308751334e-05, - "loss": 0.7625, - "step": 66910 - }, - { - "epoch": 0.5915946180095122, - "grad_norm": 11.575806617736816, - "learning_rate": 4.0140089699841466e-05, - "loss": 0.6453, - "step": 66920 - }, - { - "epoch": 0.5916830212698244, - "grad_norm": 4.198673248291016, - "learning_rate": 4.0138616312169594e-05, - "loss": 0.7204, - "step": 66930 - }, - { - "epoch": 0.5917714245301366, - "grad_norm": 6.410655975341797, - "learning_rate": 4.013714292449772e-05, - "loss": 0.7021, - "step": 66940 - }, - { - "epoch": 0.5918598277904489, - "grad_norm": 5.004095077514648, - "learning_rate": 4.013566953682585e-05, - "loss": 0.8387, - "step": 66950 - }, - { - "epoch": 0.5919482310507611, - "grad_norm": 4.536837577819824, - "learning_rate": 4.0134196149153986e-05, - "loss": 0.621, - "step": 66960 - }, - { - "epoch": 0.5920366343110733, - "grad_norm": 2.161701202392578, - "learning_rate": 4.0132722761482114e-05, - "loss": 0.603, - "step": 66970 - }, - { - "epoch": 0.5921250375713857, - "grad_norm": 2.361048698425293, - "learning_rate": 4.013124937381024e-05, - "loss": 0.7566, - "step": 66980 - }, - { - "epoch": 0.5922134408316979, - "grad_norm": 13.218186378479004, - "learning_rate": 4.012977598613837e-05, - "loss": 0.6942, - "step": 66990 - }, - { - "epoch": 0.5923018440920101, - "grad_norm": 2.0734739303588867, - "learning_rate": 4.01283025984665e-05, - "loss": 0.6897, - "step": 67000 - }, - { - "epoch": 0.5923902473523224, - "grad_norm": 3.272113561630249, - "learning_rate": 4.012682921079463e-05, - "loss": 0.572, - "step": 67010 - }, - { - "epoch": 0.5924786506126346, - "grad_norm": 2.3308887481689453, - "learning_rate": 4.0125355823122756e-05, - "loss": 0.5531, - "step": 67020 - }, - { - "epoch": 0.5925670538729468, - "grad_norm": 4.321115493774414, - "learning_rate": 4.012388243545089e-05, - "loss": 0.5591, - "step": 67030 - }, - { - "epoch": 0.5926554571332591, - "grad_norm": 9.157179832458496, - "learning_rate": 4.012240904777902e-05, - "loss": 0.6473, - "step": 67040 - }, - { - "epoch": 0.5927438603935713, - "grad_norm": 3.5586681365966797, - "learning_rate": 4.012093566010715e-05, - "loss": 0.7633, - "step": 67050 - }, - { - "epoch": 0.5928322636538835, - "grad_norm": 3.897705078125, - "learning_rate": 4.0119462272435276e-05, - "loss": 0.7359, - "step": 67060 - }, - { - "epoch": 0.5929206669141958, - "grad_norm": 4.001584053039551, - "learning_rate": 4.0117988884763405e-05, - "loss": 0.7523, - "step": 67070 - }, - { - "epoch": 0.593009070174508, - "grad_norm": 1.5126879215240479, - "learning_rate": 4.011651549709153e-05, - "loss": 0.6526, - "step": 67080 - }, - { - "epoch": 0.5930974734348202, - "grad_norm": 1.7397924661636353, - "learning_rate": 4.011504210941967e-05, - "loss": 0.6974, - "step": 67090 - }, - { - "epoch": 0.5931858766951326, - "grad_norm": 6.736157417297363, - "learning_rate": 4.011356872174779e-05, - "loss": 0.7651, - "step": 67100 - }, - { - "epoch": 0.5932742799554448, - "grad_norm": 4.01091194152832, - "learning_rate": 4.0112095334075925e-05, - "loss": 0.6184, - "step": 67110 - }, - { - "epoch": 0.593362683215757, - "grad_norm": 3.457923173904419, - "learning_rate": 4.011062194640405e-05, - "loss": 0.6837, - "step": 67120 - }, - { - "epoch": 0.5934510864760693, - "grad_norm": 2.8803300857543945, - "learning_rate": 4.010914855873218e-05, - "loss": 0.512, - "step": 67130 - }, - { - "epoch": 0.5935394897363815, - "grad_norm": 3.0528881549835205, - "learning_rate": 4.010767517106031e-05, - "loss": 0.7376, - "step": 67140 - }, - { - "epoch": 0.5936278929966937, - "grad_norm": 7.523730278015137, - "learning_rate": 4.0106201783388445e-05, - "loss": 0.7138, - "step": 67150 - }, - { - "epoch": 0.593716296257006, - "grad_norm": 1.6315839290618896, - "learning_rate": 4.010472839571657e-05, - "loss": 0.679, - "step": 67160 - }, - { - "epoch": 0.5938046995173182, - "grad_norm": 1.6547417640686035, - "learning_rate": 4.01032550080447e-05, - "loss": 0.7017, - "step": 67170 - }, - { - "epoch": 0.5938931027776304, - "grad_norm": 5.377217769622803, - "learning_rate": 4.010178162037283e-05, - "loss": 0.6217, - "step": 67180 - }, - { - "epoch": 0.5939815060379426, - "grad_norm": 2.67793869972229, - "learning_rate": 4.010030823270096e-05, - "loss": 0.6974, - "step": 67190 - }, - { - "epoch": 0.5940699092982549, - "grad_norm": 1.984525442123413, - "learning_rate": 4.009883484502909e-05, - "loss": 0.7356, - "step": 67200 - }, - { - "epoch": 0.5941583125585672, - "grad_norm": 4.21031379699707, - "learning_rate": 4.0097361457357215e-05, - "loss": 0.6745, - "step": 67210 - }, - { - "epoch": 0.5942467158188794, - "grad_norm": 3.056952476501465, - "learning_rate": 4.0095888069685344e-05, - "loss": 0.7835, - "step": 67220 - }, - { - "epoch": 0.5943351190791917, - "grad_norm": 5.154651165008545, - "learning_rate": 4.009441468201348e-05, - "loss": 0.8265, - "step": 67230 - }, - { - "epoch": 0.5944235223395039, - "grad_norm": 8.108997344970703, - "learning_rate": 4.00929412943416e-05, - "loss": 0.7139, - "step": 67240 - }, - { - "epoch": 0.5945119255998161, - "grad_norm": 2.9724066257476807, - "learning_rate": 4.0091467906669735e-05, - "loss": 0.7208, - "step": 67250 - }, - { - "epoch": 0.5946003288601284, - "grad_norm": 4.48063850402832, - "learning_rate": 4.0089994518997864e-05, - "loss": 0.6936, - "step": 67260 - }, - { - "epoch": 0.5946887321204406, - "grad_norm": 4.644166469573975, - "learning_rate": 4.008852113132599e-05, - "loss": 0.6472, - "step": 67270 - }, - { - "epoch": 0.5947771353807528, - "grad_norm": 5.567645072937012, - "learning_rate": 4.008704774365412e-05, - "loss": 0.6021, - "step": 67280 - }, - { - "epoch": 0.5948655386410651, - "grad_norm": 6.615933895111084, - "learning_rate": 4.0085574355982256e-05, - "loss": 0.6155, - "step": 67290 - }, - { - "epoch": 0.5949539419013773, - "grad_norm": 2.456272840499878, - "learning_rate": 4.008410096831038e-05, - "loss": 0.7682, - "step": 67300 - }, - { - "epoch": 0.5950423451616895, - "grad_norm": 1.886036992073059, - "learning_rate": 4.008262758063851e-05, - "loss": 0.5989, - "step": 67310 - }, - { - "epoch": 0.5951307484220018, - "grad_norm": 3.0485422611236572, - "learning_rate": 4.008115419296664e-05, - "loss": 0.7711, - "step": 67320 - }, - { - "epoch": 0.5952191516823141, - "grad_norm": 1.8456193208694458, - "learning_rate": 4.007968080529477e-05, - "loss": 0.6806, - "step": 67330 - }, - { - "epoch": 0.5953075549426263, - "grad_norm": 8.89923095703125, - "learning_rate": 4.00782074176229e-05, - "loss": 0.6393, - "step": 67340 - }, - { - "epoch": 0.5953959582029386, - "grad_norm": 5.349854469299316, - "learning_rate": 4.0076734029951026e-05, - "loss": 0.7292, - "step": 67350 - }, - { - "epoch": 0.5954843614632508, - "grad_norm": 1.8010096549987793, - "learning_rate": 4.0075260642279154e-05, - "loss": 0.8465, - "step": 67360 - }, - { - "epoch": 0.595572764723563, - "grad_norm": 2.808081865310669, - "learning_rate": 4.007378725460729e-05, - "loss": 0.7319, - "step": 67370 - }, - { - "epoch": 0.5956611679838753, - "grad_norm": 2.711019515991211, - "learning_rate": 4.007231386693541e-05, - "loss": 0.7204, - "step": 67380 - }, - { - "epoch": 0.5957495712441875, - "grad_norm": 1.0074832439422607, - "learning_rate": 4.0070840479263546e-05, - "loss": 0.6799, - "step": 67390 - }, - { - "epoch": 0.5958379745044997, - "grad_norm": 2.0530591011047363, - "learning_rate": 4.0069367091591674e-05, - "loss": 0.702, - "step": 67400 - }, - { - "epoch": 0.5959263777648119, - "grad_norm": 1.8472235202789307, - "learning_rate": 4.00678937039198e-05, - "loss": 0.8623, - "step": 67410 - }, - { - "epoch": 0.5960147810251242, - "grad_norm": 3.2525367736816406, - "learning_rate": 4.006642031624793e-05, - "loss": 0.6399, - "step": 67420 - }, - { - "epoch": 0.5961031842854364, - "grad_norm": 4.3894171714782715, - "learning_rate": 4.0064946928576066e-05, - "loss": 0.7461, - "step": 67430 - }, - { - "epoch": 0.5961915875457486, - "grad_norm": 1.209202766418457, - "learning_rate": 4.006347354090419e-05, - "loss": 0.658, - "step": 67440 - }, - { - "epoch": 0.596279990806061, - "grad_norm": 5.785106658935547, - "learning_rate": 4.006200015323232e-05, - "loss": 0.6539, - "step": 67450 - }, - { - "epoch": 0.5963683940663732, - "grad_norm": 1.5607560873031616, - "learning_rate": 4.0060526765560444e-05, - "loss": 0.7356, - "step": 67460 - }, - { - "epoch": 0.5964567973266854, - "grad_norm": 1.5569267272949219, - "learning_rate": 4.005905337788858e-05, - "loss": 0.6567, - "step": 67470 - }, - { - "epoch": 0.5965452005869977, - "grad_norm": 1.541078805923462, - "learning_rate": 4.005757999021671e-05, - "loss": 0.6104, - "step": 67480 - }, - { - "epoch": 0.5966336038473099, - "grad_norm": 10.697413444519043, - "learning_rate": 4.0056106602544836e-05, - "loss": 0.6446, - "step": 67490 - }, - { - "epoch": 0.5967220071076221, - "grad_norm": 8.067002296447754, - "learning_rate": 4.0054633214872965e-05, - "loss": 0.5597, - "step": 67500 - }, - { - "epoch": 0.5968104103679344, - "grad_norm": 2.9999191761016846, - "learning_rate": 4.00531598272011e-05, - "loss": 0.743, - "step": 67510 - }, - { - "epoch": 0.5968988136282466, - "grad_norm": 3.6837759017944336, - "learning_rate": 4.005168643952922e-05, - "loss": 0.6818, - "step": 67520 - }, - { - "epoch": 0.5969872168885588, - "grad_norm": 1.8458577394485474, - "learning_rate": 4.0050213051857356e-05, - "loss": 0.8241, - "step": 67530 - }, - { - "epoch": 0.5970756201488711, - "grad_norm": 9.878037452697754, - "learning_rate": 4.0048739664185485e-05, - "loss": 0.5651, - "step": 67540 - }, - { - "epoch": 0.5971640234091833, - "grad_norm": 5.5965166091918945, - "learning_rate": 4.004726627651361e-05, - "loss": 0.6289, - "step": 67550 - }, - { - "epoch": 0.5972524266694955, - "grad_norm": 11.003169059753418, - "learning_rate": 4.004579288884174e-05, - "loss": 0.7754, - "step": 67560 - }, - { - "epoch": 0.5973408299298079, - "grad_norm": 3.590615749359131, - "learning_rate": 4.004431950116987e-05, - "loss": 0.5968, - "step": 67570 - }, - { - "epoch": 0.5974292331901201, - "grad_norm": 9.921289443969727, - "learning_rate": 4.0042846113498e-05, - "loss": 0.6193, - "step": 67580 - }, - { - "epoch": 0.5975176364504323, - "grad_norm": 6.370003700256348, - "learning_rate": 4.004137272582613e-05, - "loss": 0.6825, - "step": 67590 - }, - { - "epoch": 0.5976060397107446, - "grad_norm": 5.396376609802246, - "learning_rate": 4.0039899338154255e-05, - "loss": 0.6942, - "step": 67600 - }, - { - "epoch": 0.5976944429710568, - "grad_norm": 4.06182336807251, - "learning_rate": 4.003842595048239e-05, - "loss": 0.6383, - "step": 67610 - }, - { - "epoch": 0.597782846231369, - "grad_norm": 2.475921630859375, - "learning_rate": 4.003695256281052e-05, - "loss": 0.718, - "step": 67620 - }, - { - "epoch": 0.5978712494916812, - "grad_norm": 11.372515678405762, - "learning_rate": 4.003547917513865e-05, - "loss": 0.7232, - "step": 67630 - }, - { - "epoch": 0.5979596527519935, - "grad_norm": 2.693408250808716, - "learning_rate": 4.0034005787466775e-05, - "loss": 0.7385, - "step": 67640 - }, - { - "epoch": 0.5980480560123057, - "grad_norm": 6.491113662719727, - "learning_rate": 4.003253239979491e-05, - "loss": 0.7132, - "step": 67650 - }, - { - "epoch": 0.5981364592726179, - "grad_norm": 1.7727231979370117, - "learning_rate": 4.003105901212303e-05, - "loss": 0.6441, - "step": 67660 - }, - { - "epoch": 0.5982248625329302, - "grad_norm": 4.259739398956299, - "learning_rate": 4.002958562445117e-05, - "loss": 0.8155, - "step": 67670 - }, - { - "epoch": 0.5983132657932425, - "grad_norm": 0.9205679893493652, - "learning_rate": 4.002811223677929e-05, - "loss": 0.6162, - "step": 67680 - }, - { - "epoch": 0.5984016690535547, - "grad_norm": 1.4040359258651733, - "learning_rate": 4.0026638849107424e-05, - "loss": 0.6264, - "step": 67690 - }, - { - "epoch": 0.598490072313867, - "grad_norm": 3.6029984951019287, - "learning_rate": 4.002516546143555e-05, - "loss": 0.8111, - "step": 67700 - }, - { - "epoch": 0.5985784755741792, - "grad_norm": 4.135036945343018, - "learning_rate": 4.002369207376368e-05, - "loss": 0.7476, - "step": 67710 - }, - { - "epoch": 0.5986668788344914, - "grad_norm": 2.148099899291992, - "learning_rate": 4.002221868609181e-05, - "loss": 0.7191, - "step": 67720 - }, - { - "epoch": 0.5987552820948037, - "grad_norm": 2.440849781036377, - "learning_rate": 4.0020745298419944e-05, - "loss": 0.7065, - "step": 67730 - }, - { - "epoch": 0.5988436853551159, - "grad_norm": 7.0985589027404785, - "learning_rate": 4.0019271910748065e-05, - "loss": 0.5483, - "step": 67740 - }, - { - "epoch": 0.5989320886154281, - "grad_norm": 6.353954315185547, - "learning_rate": 4.00177985230762e-05, - "loss": 0.7893, - "step": 67750 - }, - { - "epoch": 0.5990204918757404, - "grad_norm": 5.931369781494141, - "learning_rate": 4.001632513540433e-05, - "loss": 0.7944, - "step": 67760 - }, - { - "epoch": 0.5991088951360526, - "grad_norm": 1.0348714590072632, - "learning_rate": 4.001485174773246e-05, - "loss": 0.6199, - "step": 67770 - }, - { - "epoch": 0.5991972983963648, - "grad_norm": 3.4274654388427734, - "learning_rate": 4.0013378360060586e-05, - "loss": 0.7245, - "step": 67780 - }, - { - "epoch": 0.599285701656677, - "grad_norm": 2.3413896560668945, - "learning_rate": 4.001190497238872e-05, - "loss": 0.6347, - "step": 67790 - }, - { - "epoch": 0.5993741049169894, - "grad_norm": 1.481231451034546, - "learning_rate": 4.001043158471684e-05, - "loss": 0.6589, - "step": 67800 - }, - { - "epoch": 0.5994625081773016, - "grad_norm": 3.3175745010375977, - "learning_rate": 4.000895819704498e-05, - "loss": 0.5997, - "step": 67810 - }, - { - "epoch": 0.5995509114376139, - "grad_norm": 3.3511624336242676, - "learning_rate": 4.0007484809373106e-05, - "loss": 0.6646, - "step": 67820 - }, - { - "epoch": 0.5996393146979261, - "grad_norm": 6.857593536376953, - "learning_rate": 4.0006011421701234e-05, - "loss": 0.5203, - "step": 67830 - }, - { - "epoch": 0.5997277179582383, - "grad_norm": 2.252788543701172, - "learning_rate": 4.000453803402936e-05, - "loss": 0.7173, - "step": 67840 - }, - { - "epoch": 0.5998161212185505, - "grad_norm": 2.993269205093384, - "learning_rate": 4.000306464635749e-05, - "loss": 0.7273, - "step": 67850 - }, - { - "epoch": 0.5999045244788628, - "grad_norm": 7.120776653289795, - "learning_rate": 4.000159125868562e-05, - "loss": 0.7521, - "step": 67860 - }, - { - "epoch": 0.599992927739175, - "grad_norm": 1.7486554384231567, - "learning_rate": 4.0000117871013754e-05, - "loss": 0.6977, - "step": 67870 - }, - { - "epoch": 0.6000813309994872, - "grad_norm": 1.0409510135650635, - "learning_rate": 3.999864448334188e-05, - "loss": 0.739, - "step": 67880 - }, - { - "epoch": 0.6001697342597995, - "grad_norm": 12.113892555236816, - "learning_rate": 3.999717109567001e-05, - "loss": 0.81, - "step": 67890 - }, - { - "epoch": 0.6002581375201117, - "grad_norm": 5.20490837097168, - "learning_rate": 3.999569770799814e-05, - "loss": 0.7397, - "step": 67900 - }, - { - "epoch": 0.6003465407804239, - "grad_norm": 3.258568286895752, - "learning_rate": 3.999422432032627e-05, - "loss": 0.7813, - "step": 67910 - }, - { - "epoch": 0.6004349440407363, - "grad_norm": 9.120695114135742, - "learning_rate": 3.9992750932654396e-05, - "loss": 0.6967, - "step": 67920 - }, - { - "epoch": 0.6005233473010485, - "grad_norm": 2.6310369968414307, - "learning_rate": 3.9991277544982524e-05, - "loss": 0.6519, - "step": 67930 - }, - { - "epoch": 0.6006117505613607, - "grad_norm": 2.5960001945495605, - "learning_rate": 3.998980415731066e-05, - "loss": 0.8263, - "step": 67940 - }, - { - "epoch": 0.600700153821673, - "grad_norm": 4.554094314575195, - "learning_rate": 3.998833076963879e-05, - "loss": 0.8485, - "step": 67950 - }, - { - "epoch": 0.6007885570819852, - "grad_norm": 1.8861922025680542, - "learning_rate": 3.9986857381966916e-05, - "loss": 0.6734, - "step": 67960 - }, - { - "epoch": 0.6008769603422974, - "grad_norm": 7.652975559234619, - "learning_rate": 3.9985383994295045e-05, - "loss": 0.678, - "step": 67970 - }, - { - "epoch": 0.6009653636026097, - "grad_norm": 3.88148832321167, - "learning_rate": 3.998391060662317e-05, - "loss": 0.5662, - "step": 67980 - }, - { - "epoch": 0.6010537668629219, - "grad_norm": 2.120248556137085, - "learning_rate": 3.99824372189513e-05, - "loss": 0.7374, - "step": 67990 - }, - { - "epoch": 0.6011421701232341, - "grad_norm": 4.1534199714660645, - "learning_rate": 3.9980963831279437e-05, - "loss": 0.5647, - "step": 68000 - }, - { - "epoch": 0.6012305733835464, - "grad_norm": 1.922553300857544, - "learning_rate": 3.9979490443607565e-05, - "loss": 0.7215, - "step": 68010 - }, - { - "epoch": 0.6013189766438586, - "grad_norm": 13.003496170043945, - "learning_rate": 3.997801705593569e-05, - "loss": 0.5849, - "step": 68020 - }, - { - "epoch": 0.6014073799041708, - "grad_norm": 2.862215995788574, - "learning_rate": 3.997654366826382e-05, - "loss": 0.6946, - "step": 68030 - }, - { - "epoch": 0.6014957831644832, - "grad_norm": 3.0106401443481445, - "learning_rate": 3.997507028059195e-05, - "loss": 0.6255, - "step": 68040 - }, - { - "epoch": 0.6015841864247954, - "grad_norm": 1.2612980604171753, - "learning_rate": 3.997359689292008e-05, - "loss": 0.7444, - "step": 68050 - }, - { - "epoch": 0.6016725896851076, - "grad_norm": 1.9860903024673462, - "learning_rate": 3.9972123505248213e-05, - "loss": 0.7292, - "step": 68060 - }, - { - "epoch": 0.6017609929454198, - "grad_norm": 3.843406915664673, - "learning_rate": 3.9970650117576335e-05, - "loss": 0.7389, - "step": 68070 - }, - { - "epoch": 0.6018493962057321, - "grad_norm": 7.409058570861816, - "learning_rate": 3.996917672990447e-05, - "loss": 0.6841, - "step": 68080 - }, - { - "epoch": 0.6019377994660443, - "grad_norm": 3.4304847717285156, - "learning_rate": 3.99677033422326e-05, - "loss": 0.6102, - "step": 68090 - }, - { - "epoch": 0.6020262027263565, - "grad_norm": 5.2471537590026855, - "learning_rate": 3.996622995456073e-05, - "loss": 0.7516, - "step": 68100 - }, - { - "epoch": 0.6021146059866688, - "grad_norm": 3.076887607574463, - "learning_rate": 3.9964756566888855e-05, - "loss": 0.5522, - "step": 68110 - }, - { - "epoch": 0.602203009246981, - "grad_norm": 1.0112590789794922, - "learning_rate": 3.996328317921699e-05, - "loss": 0.5902, - "step": 68120 - }, - { - "epoch": 0.6022914125072932, - "grad_norm": 4.697880268096924, - "learning_rate": 3.996180979154511e-05, - "loss": 0.6159, - "step": 68130 - }, - { - "epoch": 0.6023798157676055, - "grad_norm": 1.8803813457489014, - "learning_rate": 3.996033640387325e-05, - "loss": 0.6603, - "step": 68140 - }, - { - "epoch": 0.6024682190279177, - "grad_norm": 1.8947783708572388, - "learning_rate": 3.9958863016201375e-05, - "loss": 0.4859, - "step": 68150 - }, - { - "epoch": 0.60255662228823, - "grad_norm": 2.829491376876831, - "learning_rate": 3.9957389628529504e-05, - "loss": 0.5707, - "step": 68160 - }, - { - "epoch": 0.6026450255485423, - "grad_norm": 5.179346561431885, - "learning_rate": 3.995591624085763e-05, - "loss": 0.6871, - "step": 68170 - }, - { - "epoch": 0.6027334288088545, - "grad_norm": 1.9947444200515747, - "learning_rate": 3.995444285318576e-05, - "loss": 0.6344, - "step": 68180 - }, - { - "epoch": 0.6028218320691667, - "grad_norm": 3.2611663341522217, - "learning_rate": 3.995296946551389e-05, - "loss": 0.7923, - "step": 68190 - }, - { - "epoch": 0.602910235329479, - "grad_norm": 10.408723831176758, - "learning_rate": 3.9951496077842024e-05, - "loss": 0.5766, - "step": 68200 - }, - { - "epoch": 0.6029986385897912, - "grad_norm": 2.6321170330047607, - "learning_rate": 3.9950022690170145e-05, - "loss": 0.8093, - "step": 68210 - }, - { - "epoch": 0.6030870418501034, - "grad_norm": 1.8001887798309326, - "learning_rate": 3.994854930249828e-05, - "loss": 0.7165, - "step": 68220 - }, - { - "epoch": 0.6031754451104157, - "grad_norm": 4.607061862945557, - "learning_rate": 3.994707591482641e-05, - "loss": 0.6837, - "step": 68230 - }, - { - "epoch": 0.6032638483707279, - "grad_norm": 3.9257895946502686, - "learning_rate": 3.994560252715454e-05, - "loss": 0.6992, - "step": 68240 - }, - { - "epoch": 0.6033522516310401, - "grad_norm": 1.7141555547714233, - "learning_rate": 3.9944129139482666e-05, - "loss": 0.613, - "step": 68250 - }, - { - "epoch": 0.6034406548913523, - "grad_norm": 3.1083195209503174, - "learning_rate": 3.99426557518108e-05, - "loss": 0.6735, - "step": 68260 - }, - { - "epoch": 0.6035290581516647, - "grad_norm": 1.1760464906692505, - "learning_rate": 3.994118236413892e-05, - "loss": 0.6472, - "step": 68270 - }, - { - "epoch": 0.6036174614119769, - "grad_norm": 4.06918478012085, - "learning_rate": 3.993970897646706e-05, - "loss": 0.6039, - "step": 68280 - }, - { - "epoch": 0.6037058646722891, - "grad_norm": 3.1744275093078613, - "learning_rate": 3.993823558879518e-05, - "loss": 0.6446, - "step": 68290 - }, - { - "epoch": 0.6037942679326014, - "grad_norm": 0.9572000503540039, - "learning_rate": 3.9936762201123314e-05, - "loss": 0.606, - "step": 68300 - }, - { - "epoch": 0.6038826711929136, - "grad_norm": 3.2773358821868896, - "learning_rate": 3.993528881345144e-05, - "loss": 0.5472, - "step": 68310 - }, - { - "epoch": 0.6039710744532258, - "grad_norm": 5.722139835357666, - "learning_rate": 3.993381542577957e-05, - "loss": 0.5986, - "step": 68320 - }, - { - "epoch": 0.6040594777135381, - "grad_norm": 4.527989387512207, - "learning_rate": 3.99323420381077e-05, - "loss": 0.6804, - "step": 68330 - }, - { - "epoch": 0.6041478809738503, - "grad_norm": 3.540149211883545, - "learning_rate": 3.9930868650435834e-05, - "loss": 0.7221, - "step": 68340 - }, - { - "epoch": 0.6042362842341625, - "grad_norm": 4.579570293426514, - "learning_rate": 3.9929395262763956e-05, - "loss": 0.7591, - "step": 68350 - }, - { - "epoch": 0.6043246874944748, - "grad_norm": 7.313347339630127, - "learning_rate": 3.992792187509209e-05, - "loss": 0.7487, - "step": 68360 - }, - { - "epoch": 0.604413090754787, - "grad_norm": 2.8706626892089844, - "learning_rate": 3.992644848742022e-05, - "loss": 0.7745, - "step": 68370 - }, - { - "epoch": 0.6045014940150992, - "grad_norm": 2.8275105953216553, - "learning_rate": 3.992497509974835e-05, - "loss": 0.5839, - "step": 68380 - }, - { - "epoch": 0.6045898972754116, - "grad_norm": 3.384598731994629, - "learning_rate": 3.9923501712076476e-05, - "loss": 0.5708, - "step": 68390 - }, - { - "epoch": 0.6046783005357238, - "grad_norm": 2.156449794769287, - "learning_rate": 3.9922028324404605e-05, - "loss": 0.8107, - "step": 68400 - }, - { - "epoch": 0.604766703796036, - "grad_norm": 4.001865386962891, - "learning_rate": 3.992055493673273e-05, - "loss": 0.631, - "step": 68410 - }, - { - "epoch": 0.6048551070563483, - "grad_norm": 7.301394939422607, - "learning_rate": 3.991908154906087e-05, - "loss": 0.6151, - "step": 68420 - }, - { - "epoch": 0.6049435103166605, - "grad_norm": 4.076961517333984, - "learning_rate": 3.991760816138899e-05, - "loss": 0.6588, - "step": 68430 - }, - { - "epoch": 0.6050319135769727, - "grad_norm": 2.770707845687866, - "learning_rate": 3.9916134773717125e-05, - "loss": 0.5893, - "step": 68440 - }, - { - "epoch": 0.605120316837285, - "grad_norm": 6.907416820526123, - "learning_rate": 3.991466138604525e-05, - "loss": 0.7231, - "step": 68450 - }, - { - "epoch": 0.6052087200975972, - "grad_norm": 7.347880840301514, - "learning_rate": 3.991318799837338e-05, - "loss": 0.5238, - "step": 68460 - }, - { - "epoch": 0.6052971233579094, - "grad_norm": 3.90139102935791, - "learning_rate": 3.991171461070151e-05, - "loss": 0.783, - "step": 68470 - }, - { - "epoch": 0.6053855266182216, - "grad_norm": 4.252216339111328, - "learning_rate": 3.9910241223029645e-05, - "loss": 0.8325, - "step": 68480 - }, - { - "epoch": 0.6054739298785339, - "grad_norm": 5.732337474822998, - "learning_rate": 3.9908767835357767e-05, - "loss": 0.7703, - "step": 68490 - }, - { - "epoch": 0.6055623331388461, - "grad_norm": 4.272355079650879, - "learning_rate": 3.99072944476859e-05, - "loss": 0.6956, - "step": 68500 - }, - { - "epoch": 0.6056507363991585, - "grad_norm": 3.164580821990967, - "learning_rate": 3.990582106001402e-05, - "loss": 0.644, - "step": 68510 - }, - { - "epoch": 0.6057391396594707, - "grad_norm": 2.5002005100250244, - "learning_rate": 3.990434767234216e-05, - "loss": 0.6915, - "step": 68520 - }, - { - "epoch": 0.6058275429197829, - "grad_norm": 3.9687719345092773, - "learning_rate": 3.990287428467029e-05, - "loss": 0.6782, - "step": 68530 - }, - { - "epoch": 0.6059159461800951, - "grad_norm": 2.8334271907806396, - "learning_rate": 3.9901400896998415e-05, - "loss": 0.5957, - "step": 68540 - }, - { - "epoch": 0.6060043494404074, - "grad_norm": 5.97862434387207, - "learning_rate": 3.9899927509326543e-05, - "loss": 0.7612, - "step": 68550 - }, - { - "epoch": 0.6060927527007196, - "grad_norm": 2.0084502696990967, - "learning_rate": 3.989845412165468e-05, - "loss": 0.6127, - "step": 68560 - }, - { - "epoch": 0.6061811559610318, - "grad_norm": 5.48071813583374, - "learning_rate": 3.98969807339828e-05, - "loss": 0.5699, - "step": 68570 - }, - { - "epoch": 0.6062695592213441, - "grad_norm": 3.4372706413269043, - "learning_rate": 3.9895507346310935e-05, - "loss": 0.5595, - "step": 68580 - }, - { - "epoch": 0.6063579624816563, - "grad_norm": 0.9249988794326782, - "learning_rate": 3.9894033958639064e-05, - "loss": 0.6997, - "step": 68590 - }, - { - "epoch": 0.6064463657419685, - "grad_norm": 3.710207462310791, - "learning_rate": 3.989256057096719e-05, - "loss": 0.7397, - "step": 68600 - }, - { - "epoch": 0.6065347690022808, - "grad_norm": 1.0048185586929321, - "learning_rate": 3.989108718329532e-05, - "loss": 0.7025, - "step": 68610 - }, - { - "epoch": 0.606623172262593, - "grad_norm": 4.552618980407715, - "learning_rate": 3.9889613795623455e-05, - "loss": 0.7587, - "step": 68620 - }, - { - "epoch": 0.6067115755229053, - "grad_norm": 1.4993724822998047, - "learning_rate": 3.988814040795158e-05, - "loss": 0.5566, - "step": 68630 - }, - { - "epoch": 0.6067999787832176, - "grad_norm": 1.413038730621338, - "learning_rate": 3.988666702027971e-05, - "loss": 0.6957, - "step": 68640 - }, - { - "epoch": 0.6068883820435298, - "grad_norm": 3.3125061988830566, - "learning_rate": 3.9885193632607834e-05, - "loss": 0.66, - "step": 68650 - }, - { - "epoch": 0.606976785303842, - "grad_norm": 1.9941564798355103, - "learning_rate": 3.988372024493597e-05, - "loss": 0.6737, - "step": 68660 - }, - { - "epoch": 0.6070651885641543, - "grad_norm": 1.3719918727874756, - "learning_rate": 3.98822468572641e-05, - "loss": 0.6748, - "step": 68670 - }, - { - "epoch": 0.6071535918244665, - "grad_norm": 3.223843812942505, - "learning_rate": 3.9880773469592226e-05, - "loss": 0.6671, - "step": 68680 - }, - { - "epoch": 0.6072419950847787, - "grad_norm": 5.648901462554932, - "learning_rate": 3.9879300081920354e-05, - "loss": 0.6562, - "step": 68690 - }, - { - "epoch": 0.607330398345091, - "grad_norm": 8.837112426757812, - "learning_rate": 3.987782669424849e-05, - "loss": 0.6545, - "step": 68700 - }, - { - "epoch": 0.6074188016054032, - "grad_norm": 3.6554744243621826, - "learning_rate": 3.987635330657661e-05, - "loss": 0.5833, - "step": 68710 - }, - { - "epoch": 0.6075072048657154, - "grad_norm": 3.366485595703125, - "learning_rate": 3.9874879918904746e-05, - "loss": 0.743, - "step": 68720 - }, - { - "epoch": 0.6075956081260276, - "grad_norm": 6.173121452331543, - "learning_rate": 3.9873406531232874e-05, - "loss": 0.734, - "step": 68730 - }, - { - "epoch": 0.6076840113863399, - "grad_norm": 9.327568054199219, - "learning_rate": 3.9871933143561e-05, - "loss": 0.6856, - "step": 68740 - }, - { - "epoch": 0.6077724146466522, - "grad_norm": 2.837390184402466, - "learning_rate": 3.987045975588913e-05, - "loss": 0.5528, - "step": 68750 - }, - { - "epoch": 0.6078608179069644, - "grad_norm": 4.72812032699585, - "learning_rate": 3.986898636821726e-05, - "loss": 0.6144, - "step": 68760 - }, - { - "epoch": 0.6079492211672767, - "grad_norm": 4.415213108062744, - "learning_rate": 3.986751298054539e-05, - "loss": 0.6628, - "step": 68770 - }, - { - "epoch": 0.6080376244275889, - "grad_norm": 2.4548535346984863, - "learning_rate": 3.986603959287352e-05, - "loss": 0.5479, - "step": 68780 - }, - { - "epoch": 0.6081260276879011, - "grad_norm": 2.5510547161102295, - "learning_rate": 3.986456620520165e-05, - "loss": 0.6633, - "step": 68790 - }, - { - "epoch": 0.6082144309482134, - "grad_norm": 4.9136576652526855, - "learning_rate": 3.986309281752978e-05, - "loss": 0.7326, - "step": 68800 - }, - { - "epoch": 0.6083028342085256, - "grad_norm": 4.370539665222168, - "learning_rate": 3.986161942985791e-05, - "loss": 0.7098, - "step": 68810 - }, - { - "epoch": 0.6083912374688378, - "grad_norm": 2.7245564460754395, - "learning_rate": 3.9860146042186036e-05, - "loss": 0.7583, - "step": 68820 - }, - { - "epoch": 0.6084796407291501, - "grad_norm": 4.710555553436279, - "learning_rate": 3.9858672654514164e-05, - "loss": 0.6862, - "step": 68830 - }, - { - "epoch": 0.6085680439894623, - "grad_norm": 1.6268647909164429, - "learning_rate": 3.98571992668423e-05, - "loss": 0.5837, - "step": 68840 - }, - { - "epoch": 0.6086564472497745, - "grad_norm": 3.0540761947631836, - "learning_rate": 3.985572587917043e-05, - "loss": 0.684, - "step": 68850 - }, - { - "epoch": 0.6087448505100869, - "grad_norm": 2.8425979614257812, - "learning_rate": 3.9854252491498556e-05, - "loss": 0.7028, - "step": 68860 - }, - { - "epoch": 0.6088332537703991, - "grad_norm": 9.741423606872559, - "learning_rate": 3.9852779103826685e-05, - "loss": 0.6337, - "step": 68870 - }, - { - "epoch": 0.6089216570307113, - "grad_norm": 2.310828685760498, - "learning_rate": 3.985130571615481e-05, - "loss": 0.6625, - "step": 68880 - }, - { - "epoch": 0.6090100602910236, - "grad_norm": 3.778407096862793, - "learning_rate": 3.984983232848294e-05, - "loss": 0.8521, - "step": 68890 - }, - { - "epoch": 0.6090984635513358, - "grad_norm": 4.01237154006958, - "learning_rate": 3.984835894081107e-05, - "loss": 0.7701, - "step": 68900 - }, - { - "epoch": 0.609186866811648, - "grad_norm": 11.06180477142334, - "learning_rate": 3.9846885553139205e-05, - "loss": 0.6215, - "step": 68910 - }, - { - "epoch": 0.6092752700719603, - "grad_norm": 5.82162618637085, - "learning_rate": 3.984541216546733e-05, - "loss": 0.5944, - "step": 68920 - }, - { - "epoch": 0.6093636733322725, - "grad_norm": 2.3689417839050293, - "learning_rate": 3.984393877779546e-05, - "loss": 0.6031, - "step": 68930 - }, - { - "epoch": 0.6094520765925847, - "grad_norm": 5.308497905731201, - "learning_rate": 3.984246539012359e-05, - "loss": 0.7454, - "step": 68940 - }, - { - "epoch": 0.6095404798528969, - "grad_norm": 4.571074962615967, - "learning_rate": 3.984099200245172e-05, - "loss": 0.5948, - "step": 68950 - }, - { - "epoch": 0.6096288831132092, - "grad_norm": 4.445878028869629, - "learning_rate": 3.9839518614779847e-05, - "loss": 0.7984, - "step": 68960 - }, - { - "epoch": 0.6097172863735214, - "grad_norm": 3.9879441261291504, - "learning_rate": 3.983804522710798e-05, - "loss": 0.6897, - "step": 68970 - }, - { - "epoch": 0.6098056896338337, - "grad_norm": 3.2836380004882812, - "learning_rate": 3.98365718394361e-05, - "loss": 0.7139, - "step": 68980 - }, - { - "epoch": 0.609894092894146, - "grad_norm": 6.017600059509277, - "learning_rate": 3.983509845176424e-05, - "loss": 0.6724, - "step": 68990 - }, - { - "epoch": 0.6099824961544582, - "grad_norm": 3.9386825561523438, - "learning_rate": 3.983362506409237e-05, - "loss": 0.7285, - "step": 69000 - }, - { - "epoch": 0.6100708994147704, - "grad_norm": 3.628932476043701, - "learning_rate": 3.9832151676420495e-05, - "loss": 0.8888, - "step": 69010 - }, - { - "epoch": 0.6101593026750827, - "grad_norm": 1.531785011291504, - "learning_rate": 3.9830678288748623e-05, - "loss": 0.6118, - "step": 69020 - }, - { - "epoch": 0.6102477059353949, - "grad_norm": 2.324725389480591, - "learning_rate": 3.982920490107676e-05, - "loss": 0.8042, - "step": 69030 - }, - { - "epoch": 0.6103361091957071, - "grad_norm": 5.797264099121094, - "learning_rate": 3.982773151340488e-05, - "loss": 0.5666, - "step": 69040 - }, - { - "epoch": 0.6104245124560194, - "grad_norm": 10.481884002685547, - "learning_rate": 3.9826258125733015e-05, - "loss": 0.7382, - "step": 69050 - }, - { - "epoch": 0.6105129157163316, - "grad_norm": 6.266570091247559, - "learning_rate": 3.9824784738061144e-05, - "loss": 0.7971, - "step": 69060 - }, - { - "epoch": 0.6106013189766438, - "grad_norm": 2.1786258220672607, - "learning_rate": 3.982331135038927e-05, - "loss": 0.5833, - "step": 69070 - }, - { - "epoch": 0.6106897222369561, - "grad_norm": 4.0261054039001465, - "learning_rate": 3.98218379627174e-05, - "loss": 0.7022, - "step": 69080 - }, - { - "epoch": 0.6107781254972683, - "grad_norm": 1.2456425428390503, - "learning_rate": 3.9820364575045536e-05, - "loss": 0.7358, - "step": 69090 - }, - { - "epoch": 0.6108665287575806, - "grad_norm": 2.3084208965301514, - "learning_rate": 3.981889118737366e-05, - "loss": 0.7043, - "step": 69100 - }, - { - "epoch": 0.6109549320178929, - "grad_norm": 3.831892728805542, - "learning_rate": 3.981741779970179e-05, - "loss": 0.7041, - "step": 69110 - }, - { - "epoch": 0.6110433352782051, - "grad_norm": 1.9702353477478027, - "learning_rate": 3.9815944412029914e-05, - "loss": 0.6562, - "step": 69120 - }, - { - "epoch": 0.6111317385385173, - "grad_norm": 4.149004936218262, - "learning_rate": 3.981447102435805e-05, - "loss": 0.6047, - "step": 69130 - }, - { - "epoch": 0.6112201417988296, - "grad_norm": 2.6869633197784424, - "learning_rate": 3.981299763668618e-05, - "loss": 0.6753, - "step": 69140 - }, - { - "epoch": 0.6113085450591418, - "grad_norm": 9.032734870910645, - "learning_rate": 3.9811524249014306e-05, - "loss": 0.7659, - "step": 69150 - }, - { - "epoch": 0.611396948319454, - "grad_norm": 9.428448677062988, - "learning_rate": 3.9810050861342434e-05, - "loss": 0.6832, - "step": 69160 - }, - { - "epoch": 0.6114853515797662, - "grad_norm": 3.1898558139801025, - "learning_rate": 3.980857747367057e-05, - "loss": 0.5865, - "step": 69170 - }, - { - "epoch": 0.6115737548400785, - "grad_norm": 2.5670242309570312, - "learning_rate": 3.980710408599869e-05, - "loss": 0.7318, - "step": 69180 - }, - { - "epoch": 0.6116621581003907, - "grad_norm": 12.594137191772461, - "learning_rate": 3.9805630698326826e-05, - "loss": 0.6647, - "step": 69190 - }, - { - "epoch": 0.6117505613607029, - "grad_norm": 4.982483863830566, - "learning_rate": 3.9804157310654954e-05, - "loss": 0.6688, - "step": 69200 - }, - { - "epoch": 0.6118389646210152, - "grad_norm": 4.143118858337402, - "learning_rate": 3.980268392298308e-05, - "loss": 0.7305, - "step": 69210 - }, - { - "epoch": 0.6119273678813275, - "grad_norm": 3.982994794845581, - "learning_rate": 3.980121053531121e-05, - "loss": 0.5619, - "step": 69220 - }, - { - "epoch": 0.6120157711416397, - "grad_norm": 6.126901149749756, - "learning_rate": 3.979973714763934e-05, - "loss": 0.5906, - "step": 69230 - }, - { - "epoch": 0.612104174401952, - "grad_norm": 2.6012651920318604, - "learning_rate": 3.979826375996747e-05, - "loss": 0.7294, - "step": 69240 - }, - { - "epoch": 0.6121925776622642, - "grad_norm": 6.575870037078857, - "learning_rate": 3.97967903722956e-05, - "loss": 0.7193, - "step": 69250 - }, - { - "epoch": 0.6122809809225764, - "grad_norm": 1.7410662174224854, - "learning_rate": 3.9795316984623724e-05, - "loss": 0.763, - "step": 69260 - }, - { - "epoch": 0.6123693841828887, - "grad_norm": 4.49363899230957, - "learning_rate": 3.979384359695186e-05, - "loss": 0.8104, - "step": 69270 - }, - { - "epoch": 0.6124577874432009, - "grad_norm": 2.2362005710601807, - "learning_rate": 3.979237020927999e-05, - "loss": 0.6791, - "step": 69280 - }, - { - "epoch": 0.6125461907035131, - "grad_norm": 6.156050682067871, - "learning_rate": 3.9790896821608116e-05, - "loss": 0.7252, - "step": 69290 - }, - { - "epoch": 0.6126345939638254, - "grad_norm": 2.2494657039642334, - "learning_rate": 3.9789423433936245e-05, - "loss": 0.6665, - "step": 69300 - }, - { - "epoch": 0.6127229972241376, - "grad_norm": 2.3521780967712402, - "learning_rate": 3.978795004626438e-05, - "loss": 0.6314, - "step": 69310 - }, - { - "epoch": 0.6128114004844498, - "grad_norm": 2.6286988258361816, - "learning_rate": 3.97864766585925e-05, - "loss": 0.7451, - "step": 69320 - }, - { - "epoch": 0.6128998037447622, - "grad_norm": 1.6540248394012451, - "learning_rate": 3.9785003270920636e-05, - "loss": 0.5681, - "step": 69330 - }, - { - "epoch": 0.6129882070050744, - "grad_norm": 4.927855491638184, - "learning_rate": 3.978352988324876e-05, - "loss": 0.7682, - "step": 69340 - }, - { - "epoch": 0.6130766102653866, - "grad_norm": 2.7282636165618896, - "learning_rate": 3.978205649557689e-05, - "loss": 0.6492, - "step": 69350 - }, - { - "epoch": 0.6131650135256989, - "grad_norm": 1.715871810913086, - "learning_rate": 3.978058310790502e-05, - "loss": 0.6803, - "step": 69360 - }, - { - "epoch": 0.6132534167860111, - "grad_norm": 4.740761756896973, - "learning_rate": 3.977910972023315e-05, - "loss": 0.782, - "step": 69370 - }, - { - "epoch": 0.6133418200463233, - "grad_norm": 5.819037914276123, - "learning_rate": 3.977763633256128e-05, - "loss": 0.748, - "step": 69380 - }, - { - "epoch": 0.6134302233066355, - "grad_norm": 2.7251076698303223, - "learning_rate": 3.977616294488941e-05, - "loss": 0.7762, - "step": 69390 - }, - { - "epoch": 0.6135186265669478, - "grad_norm": 1.2761238813400269, - "learning_rate": 3.9774689557217535e-05, - "loss": 0.6961, - "step": 69400 - }, - { - "epoch": 0.61360702982726, - "grad_norm": 2.4533190727233887, - "learning_rate": 3.977321616954567e-05, - "loss": 0.7167, - "step": 69410 - }, - { - "epoch": 0.6136954330875722, - "grad_norm": 6.268654823303223, - "learning_rate": 3.97717427818738e-05, - "loss": 0.7098, - "step": 69420 - }, - { - "epoch": 0.6137838363478845, - "grad_norm": 2.5260400772094727, - "learning_rate": 3.977026939420193e-05, - "loss": 0.8052, - "step": 69430 - }, - { - "epoch": 0.6138722396081967, - "grad_norm": 2.6990771293640137, - "learning_rate": 3.9768796006530055e-05, - "loss": 0.8255, - "step": 69440 - }, - { - "epoch": 0.613960642868509, - "grad_norm": 2.0879828929901123, - "learning_rate": 3.976732261885818e-05, - "loss": 0.6054, - "step": 69450 - }, - { - "epoch": 0.6140490461288213, - "grad_norm": 2.028175115585327, - "learning_rate": 3.976584923118631e-05, - "loss": 0.6453, - "step": 69460 - }, - { - "epoch": 0.6141374493891335, - "grad_norm": 7.0612359046936035, - "learning_rate": 3.976437584351445e-05, - "loss": 0.5713, - "step": 69470 - }, - { - "epoch": 0.6142258526494457, - "grad_norm": 1.8073804378509521, - "learning_rate": 3.976290245584257e-05, - "loss": 0.6989, - "step": 69480 - }, - { - "epoch": 0.614314255909758, - "grad_norm": 7.351374626159668, - "learning_rate": 3.9761429068170704e-05, - "loss": 0.8372, - "step": 69490 - }, - { - "epoch": 0.6144026591700702, - "grad_norm": 1.6930521726608276, - "learning_rate": 3.975995568049883e-05, - "loss": 0.7104, - "step": 69500 - }, - { - "epoch": 0.6144910624303824, - "grad_norm": 2.022446632385254, - "learning_rate": 3.975848229282696e-05, - "loss": 0.6713, - "step": 69510 - }, - { - "epoch": 0.6145794656906947, - "grad_norm": 3.321983814239502, - "learning_rate": 3.975700890515509e-05, - "loss": 0.6751, - "step": 69520 - }, - { - "epoch": 0.6146678689510069, - "grad_norm": 6.4746012687683105, - "learning_rate": 3.9755535517483224e-05, - "loss": 0.6448, - "step": 69530 - }, - { - "epoch": 0.6147562722113191, - "grad_norm": 2.5130515098571777, - "learning_rate": 3.9754062129811345e-05, - "loss": 0.6415, - "step": 69540 - }, - { - "epoch": 0.6148446754716314, - "grad_norm": 5.569534778594971, - "learning_rate": 3.975258874213948e-05, - "loss": 0.7955, - "step": 69550 - }, - { - "epoch": 0.6149330787319436, - "grad_norm": 1.5254712104797363, - "learning_rate": 3.975111535446761e-05, - "loss": 0.6602, - "step": 69560 - }, - { - "epoch": 0.6150214819922559, - "grad_norm": 3.2865262031555176, - "learning_rate": 3.974964196679574e-05, - "loss": 0.7502, - "step": 69570 - }, - { - "epoch": 0.6151098852525682, - "grad_norm": 3.7028069496154785, - "learning_rate": 3.9748168579123866e-05, - "loss": 0.7265, - "step": 69580 - }, - { - "epoch": 0.6151982885128804, - "grad_norm": 2.4027321338653564, - "learning_rate": 3.9746695191451994e-05, - "loss": 0.6391, - "step": 69590 - }, - { - "epoch": 0.6152866917731926, - "grad_norm": 7.726175308227539, - "learning_rate": 3.974522180378012e-05, - "loss": 0.6589, - "step": 69600 - }, - { - "epoch": 0.6153750950335048, - "grad_norm": 1.7029047012329102, - "learning_rate": 3.974374841610826e-05, - "loss": 0.6806, - "step": 69610 - }, - { - "epoch": 0.6154634982938171, - "grad_norm": 1.4652581214904785, - "learning_rate": 3.974227502843638e-05, - "loss": 0.6115, - "step": 69620 - }, - { - "epoch": 0.6155519015541293, - "grad_norm": 1.5396239757537842, - "learning_rate": 3.9740801640764514e-05, - "loss": 0.8202, - "step": 69630 - }, - { - "epoch": 0.6156403048144415, - "grad_norm": 3.345118999481201, - "learning_rate": 3.973932825309264e-05, - "loss": 0.5949, - "step": 69640 - }, - { - "epoch": 0.6157287080747538, - "grad_norm": 4.660312175750732, - "learning_rate": 3.973785486542077e-05, - "loss": 0.777, - "step": 69650 - }, - { - "epoch": 0.615817111335066, - "grad_norm": 2.9668712615966797, - "learning_rate": 3.97363814777489e-05, - "loss": 0.6064, - "step": 69660 - }, - { - "epoch": 0.6159055145953782, - "grad_norm": 1.708382248878479, - "learning_rate": 3.9734908090077034e-05, - "loss": 0.604, - "step": 69670 - }, - { - "epoch": 0.6159939178556905, - "grad_norm": 6.197994232177734, - "learning_rate": 3.9733434702405156e-05, - "loss": 0.6515, - "step": 69680 - }, - { - "epoch": 0.6160823211160028, - "grad_norm": 1.832005262374878, - "learning_rate": 3.973196131473329e-05, - "loss": 0.6481, - "step": 69690 - }, - { - "epoch": 0.616170724376315, - "grad_norm": 6.75554895401001, - "learning_rate": 3.973048792706142e-05, - "loss": 0.7163, - "step": 69700 - }, - { - "epoch": 0.6162591276366273, - "grad_norm": 4.5858235359191895, - "learning_rate": 3.972901453938955e-05, - "loss": 0.7289, - "step": 69710 - }, - { - "epoch": 0.6163475308969395, - "grad_norm": 1.3326013088226318, - "learning_rate": 3.9727541151717676e-05, - "loss": 0.7728, - "step": 69720 - }, - { - "epoch": 0.6164359341572517, - "grad_norm": 9.440103530883789, - "learning_rate": 3.9726067764045804e-05, - "loss": 0.605, - "step": 69730 - }, - { - "epoch": 0.616524337417564, - "grad_norm": 2.7616615295410156, - "learning_rate": 3.972459437637393e-05, - "loss": 0.6349, - "step": 69740 - }, - { - "epoch": 0.6166127406778762, - "grad_norm": 4.382421016693115, - "learning_rate": 3.972312098870207e-05, - "loss": 0.6094, - "step": 69750 - }, - { - "epoch": 0.6167011439381884, - "grad_norm": 3.0365304946899414, - "learning_rate": 3.9721647601030196e-05, - "loss": 0.674, - "step": 69760 - }, - { - "epoch": 0.6167895471985007, - "grad_norm": 2.782837390899658, - "learning_rate": 3.9720174213358325e-05, - "loss": 0.7295, - "step": 69770 - }, - { - "epoch": 0.6168779504588129, - "grad_norm": 2.722450017929077, - "learning_rate": 3.971870082568645e-05, - "loss": 0.5833, - "step": 69780 - }, - { - "epoch": 0.6169663537191251, - "grad_norm": 6.575571537017822, - "learning_rate": 3.971722743801458e-05, - "loss": 0.7132, - "step": 69790 - }, - { - "epoch": 0.6170547569794373, - "grad_norm": 1.6678440570831299, - "learning_rate": 3.971575405034271e-05, - "loss": 0.7755, - "step": 69800 - }, - { - "epoch": 0.6171431602397497, - "grad_norm": 6.708428382873535, - "learning_rate": 3.971428066267084e-05, - "loss": 0.7186, - "step": 69810 - }, - { - "epoch": 0.6172315635000619, - "grad_norm": 3.0498299598693848, - "learning_rate": 3.971280727499897e-05, - "loss": 0.6556, - "step": 69820 - }, - { - "epoch": 0.6173199667603742, - "grad_norm": 2.082214832305908, - "learning_rate": 3.97113338873271e-05, - "loss": 0.703, - "step": 69830 - }, - { - "epoch": 0.6174083700206864, - "grad_norm": 1.7338827848434448, - "learning_rate": 3.970986049965523e-05, - "loss": 0.7961, - "step": 69840 - }, - { - "epoch": 0.6174967732809986, - "grad_norm": 2.6805312633514404, - "learning_rate": 3.970838711198336e-05, - "loss": 0.6546, - "step": 69850 - }, - { - "epoch": 0.6175851765413108, - "grad_norm": 4.324188709259033, - "learning_rate": 3.9706913724311487e-05, - "loss": 0.5934, - "step": 69860 - }, - { - "epoch": 0.6176735798016231, - "grad_norm": 4.739712238311768, - "learning_rate": 3.9705440336639615e-05, - "loss": 0.5926, - "step": 69870 - }, - { - "epoch": 0.6177619830619353, - "grad_norm": 5.206289291381836, - "learning_rate": 3.970396694896775e-05, - "loss": 0.7831, - "step": 69880 - }, - { - "epoch": 0.6178503863222475, - "grad_norm": 5.760246276855469, - "learning_rate": 3.970249356129588e-05, - "loss": 0.7158, - "step": 69890 - }, - { - "epoch": 0.6179387895825598, - "grad_norm": 3.2113797664642334, - "learning_rate": 3.970102017362401e-05, - "loss": 0.7189, - "step": 69900 - }, - { - "epoch": 0.618027192842872, - "grad_norm": 17.097204208374023, - "learning_rate": 3.9699546785952135e-05, - "loss": 0.6945, - "step": 69910 - }, - { - "epoch": 0.6181155961031843, - "grad_norm": 1.966998815536499, - "learning_rate": 3.9698073398280263e-05, - "loss": 0.7506, - "step": 69920 - }, - { - "epoch": 0.6182039993634966, - "grad_norm": 5.558865070343018, - "learning_rate": 3.969660001060839e-05, - "loss": 0.7423, - "step": 69930 - }, - { - "epoch": 0.6182924026238088, - "grad_norm": 3.5467731952667236, - "learning_rate": 3.969512662293653e-05, - "loss": 0.6454, - "step": 69940 - }, - { - "epoch": 0.618380805884121, - "grad_norm": 2.814469337463379, - "learning_rate": 3.969365323526465e-05, - "loss": 0.6733, - "step": 69950 - }, - { - "epoch": 0.6184692091444333, - "grad_norm": 5.449344635009766, - "learning_rate": 3.9692179847592784e-05, - "loss": 0.7544, - "step": 69960 - }, - { - "epoch": 0.6185576124047455, - "grad_norm": 2.5994772911071777, - "learning_rate": 3.969070645992091e-05, - "loss": 0.6585, - "step": 69970 - }, - { - "epoch": 0.6186460156650577, - "grad_norm": 4.481049537658691, - "learning_rate": 3.968923307224904e-05, - "loss": 0.662, - "step": 69980 - }, - { - "epoch": 0.61873441892537, - "grad_norm": 3.5467121601104736, - "learning_rate": 3.968775968457717e-05, - "loss": 0.753, - "step": 69990 - }, - { - "epoch": 0.6188228221856822, - "grad_norm": 9.668065071105957, - "learning_rate": 3.9686286296905304e-05, - "loss": 0.5426, - "step": 70000 - }, - { - "epoch": 0.6189112254459944, - "grad_norm": 5.743401527404785, - "learning_rate": 3.9684812909233425e-05, - "loss": 0.6715, - "step": 70010 - }, - { - "epoch": 0.6189996287063066, - "grad_norm": 3.2080140113830566, - "learning_rate": 3.968333952156156e-05, - "loss": 0.6367, - "step": 70020 - }, - { - "epoch": 0.6190880319666189, - "grad_norm": 6.793011665344238, - "learning_rate": 3.968186613388969e-05, - "loss": 0.725, - "step": 70030 - }, - { - "epoch": 0.6191764352269312, - "grad_norm": 2.914573907852173, - "learning_rate": 3.968039274621782e-05, - "loss": 0.5949, - "step": 70040 - }, - { - "epoch": 0.6192648384872435, - "grad_norm": 7.469156742095947, - "learning_rate": 3.9678919358545946e-05, - "loss": 0.6438, - "step": 70050 - }, - { - "epoch": 0.6193532417475557, - "grad_norm": 2.9054205417633057, - "learning_rate": 3.9677445970874074e-05, - "loss": 0.6425, - "step": 70060 - }, - { - "epoch": 0.6194416450078679, - "grad_norm": 3.513422727584839, - "learning_rate": 3.96759725832022e-05, - "loss": 0.7361, - "step": 70070 - }, - { - "epoch": 0.6195300482681801, - "grad_norm": 2.3616559505462646, - "learning_rate": 3.967449919553034e-05, - "loss": 0.6299, - "step": 70080 - }, - { - "epoch": 0.6196184515284924, - "grad_norm": 3.658355712890625, - "learning_rate": 3.967302580785846e-05, - "loss": 0.6401, - "step": 70090 - }, - { - "epoch": 0.6197068547888046, - "grad_norm": 4.6410322189331055, - "learning_rate": 3.9671552420186594e-05, - "loss": 0.7179, - "step": 70100 - }, - { - "epoch": 0.6197952580491168, - "grad_norm": 5.003321170806885, - "learning_rate": 3.967007903251472e-05, - "loss": 0.7072, - "step": 70110 - }, - { - "epoch": 0.6198836613094291, - "grad_norm": 7.007421970367432, - "learning_rate": 3.966860564484285e-05, - "loss": 0.684, - "step": 70120 - }, - { - "epoch": 0.6199720645697413, - "grad_norm": 1.5463006496429443, - "learning_rate": 3.966713225717098e-05, - "loss": 0.5084, - "step": 70130 - }, - { - "epoch": 0.6200604678300535, - "grad_norm": 3.663630723953247, - "learning_rate": 3.9665658869499114e-05, - "loss": 0.5766, - "step": 70140 - }, - { - "epoch": 0.6201488710903658, - "grad_norm": 7.871727466583252, - "learning_rate": 3.9664185481827236e-05, - "loss": 0.7734, - "step": 70150 - }, - { - "epoch": 0.6202372743506781, - "grad_norm": 3.426593542098999, - "learning_rate": 3.966271209415537e-05, - "loss": 0.6635, - "step": 70160 - }, - { - "epoch": 0.6203256776109903, - "grad_norm": 3.6056861877441406, - "learning_rate": 3.966123870648349e-05, - "loss": 0.737, - "step": 70170 - }, - { - "epoch": 0.6204140808713026, - "grad_norm": 8.423047065734863, - "learning_rate": 3.965976531881163e-05, - "loss": 0.767, - "step": 70180 - }, - { - "epoch": 0.6205024841316148, - "grad_norm": 3.6703591346740723, - "learning_rate": 3.9658291931139756e-05, - "loss": 0.8508, - "step": 70190 - }, - { - "epoch": 0.620590887391927, - "grad_norm": 3.2734997272491455, - "learning_rate": 3.9656818543467884e-05, - "loss": 0.6617, - "step": 70200 - }, - { - "epoch": 0.6206792906522393, - "grad_norm": 1.8895032405853271, - "learning_rate": 3.965534515579601e-05, - "loss": 0.7108, - "step": 70210 - }, - { - "epoch": 0.6207676939125515, - "grad_norm": 2.559908390045166, - "learning_rate": 3.965387176812415e-05, - "loss": 0.6649, - "step": 70220 - }, - { - "epoch": 0.6208560971728637, - "grad_norm": 2.5558080673217773, - "learning_rate": 3.965239838045227e-05, - "loss": 0.6888, - "step": 70230 - }, - { - "epoch": 0.620944500433176, - "grad_norm": 2.5347774028778076, - "learning_rate": 3.9650924992780405e-05, - "loss": 0.57, - "step": 70240 - }, - { - "epoch": 0.6210329036934882, - "grad_norm": 2.93037748336792, - "learning_rate": 3.964945160510853e-05, - "loss": 0.7308, - "step": 70250 - }, - { - "epoch": 0.6211213069538004, - "grad_norm": 1.7718629837036133, - "learning_rate": 3.964797821743666e-05, - "loss": 0.6658, - "step": 70260 - }, - { - "epoch": 0.6212097102141126, - "grad_norm": 3.6500391960144043, - "learning_rate": 3.964650482976479e-05, - "loss": 0.6599, - "step": 70270 - }, - { - "epoch": 0.621298113474425, - "grad_norm": 8.794549942016602, - "learning_rate": 3.964503144209292e-05, - "loss": 0.6497, - "step": 70280 - }, - { - "epoch": 0.6213865167347372, - "grad_norm": 1.5417425632476807, - "learning_rate": 3.9643558054421046e-05, - "loss": 0.6085, - "step": 70290 - }, - { - "epoch": 0.6214749199950494, - "grad_norm": 3.684896230697632, - "learning_rate": 3.964208466674918e-05, - "loss": 0.6309, - "step": 70300 - }, - { - "epoch": 0.6215633232553617, - "grad_norm": 9.374857902526855, - "learning_rate": 3.96406112790773e-05, - "loss": 0.7555, - "step": 70310 - }, - { - "epoch": 0.6216517265156739, - "grad_norm": 4.701685428619385, - "learning_rate": 3.963913789140544e-05, - "loss": 0.6665, - "step": 70320 - }, - { - "epoch": 0.6217401297759861, - "grad_norm": 2.6833534240722656, - "learning_rate": 3.9637664503733567e-05, - "loss": 0.7492, - "step": 70330 - }, - { - "epoch": 0.6218285330362984, - "grad_norm": 2.8093228340148926, - "learning_rate": 3.9636191116061695e-05, - "loss": 0.7487, - "step": 70340 - }, - { - "epoch": 0.6219169362966106, - "grad_norm": 2.0366687774658203, - "learning_rate": 3.963471772838982e-05, - "loss": 0.6758, - "step": 70350 - }, - { - "epoch": 0.6220053395569228, - "grad_norm": 6.7705230712890625, - "learning_rate": 3.963324434071796e-05, - "loss": 0.6338, - "step": 70360 - }, - { - "epoch": 0.6220937428172351, - "grad_norm": 5.320030212402344, - "learning_rate": 3.963177095304608e-05, - "loss": 0.6145, - "step": 70370 - }, - { - "epoch": 0.6221821460775473, - "grad_norm": 1.349388837814331, - "learning_rate": 3.9630297565374215e-05, - "loss": 0.5986, - "step": 70380 - }, - { - "epoch": 0.6222705493378596, - "grad_norm": 3.4945578575134277, - "learning_rate": 3.962882417770234e-05, - "loss": 0.6608, - "step": 70390 - }, - { - "epoch": 0.6223589525981719, - "grad_norm": 7.115966320037842, - "learning_rate": 3.962735079003047e-05, - "loss": 0.7617, - "step": 70400 - }, - { - "epoch": 0.6224473558584841, - "grad_norm": 3.625720500946045, - "learning_rate": 3.96258774023586e-05, - "loss": 0.696, - "step": 70410 - }, - { - "epoch": 0.6225357591187963, - "grad_norm": 4.838071346282959, - "learning_rate": 3.962440401468673e-05, - "loss": 0.742, - "step": 70420 - }, - { - "epoch": 0.6226241623791086, - "grad_norm": 1.1251450777053833, - "learning_rate": 3.962293062701486e-05, - "loss": 0.6648, - "step": 70430 - }, - { - "epoch": 0.6227125656394208, - "grad_norm": 5.156866073608398, - "learning_rate": 3.962145723934299e-05, - "loss": 0.6456, - "step": 70440 - }, - { - "epoch": 0.622800968899733, - "grad_norm": 2.8390755653381348, - "learning_rate": 3.9619983851671114e-05, - "loss": 0.704, - "step": 70450 - }, - { - "epoch": 0.6228893721600453, - "grad_norm": 2.190711498260498, - "learning_rate": 3.961851046399925e-05, - "loss": 0.7537, - "step": 70460 - }, - { - "epoch": 0.6229777754203575, - "grad_norm": 11.280312538146973, - "learning_rate": 3.961703707632738e-05, - "loss": 0.691, - "step": 70470 - }, - { - "epoch": 0.6230661786806697, - "grad_norm": 1.746973991394043, - "learning_rate": 3.9615563688655505e-05, - "loss": 0.7353, - "step": 70480 - }, - { - "epoch": 0.6231545819409819, - "grad_norm": 3.1988108158111572, - "learning_rate": 3.9614090300983634e-05, - "loss": 0.613, - "step": 70490 - }, - { - "epoch": 0.6232429852012942, - "grad_norm": 2.930650234222412, - "learning_rate": 3.961261691331177e-05, - "loss": 0.7425, - "step": 70500 - }, - { - "epoch": 0.6233313884616065, - "grad_norm": 3.3266892433166504, - "learning_rate": 3.961114352563989e-05, - "loss": 0.6717, - "step": 70510 - }, - { - "epoch": 0.6234197917219187, - "grad_norm": 1.8725578784942627, - "learning_rate": 3.9609670137968026e-05, - "loss": 0.6432, - "step": 70520 - }, - { - "epoch": 0.623508194982231, - "grad_norm": 5.384922027587891, - "learning_rate": 3.960819675029615e-05, - "loss": 0.9552, - "step": 70530 - }, - { - "epoch": 0.6235965982425432, - "grad_norm": 2.1145195960998535, - "learning_rate": 3.960672336262428e-05, - "loss": 0.6509, - "step": 70540 - }, - { - "epoch": 0.6236850015028554, - "grad_norm": 2.2755064964294434, - "learning_rate": 3.960524997495241e-05, - "loss": 0.7932, - "step": 70550 - }, - { - "epoch": 0.6237734047631677, - "grad_norm": 1.5214645862579346, - "learning_rate": 3.960377658728054e-05, - "loss": 0.7171, - "step": 70560 - }, - { - "epoch": 0.6238618080234799, - "grad_norm": 2.2720513343811035, - "learning_rate": 3.960230319960867e-05, - "loss": 0.7061, - "step": 70570 - }, - { - "epoch": 0.6239502112837921, - "grad_norm": 1.4581434726715088, - "learning_rate": 3.96008298119368e-05, - "loss": 0.6505, - "step": 70580 - }, - { - "epoch": 0.6240386145441044, - "grad_norm": 5.223835468292236, - "learning_rate": 3.9599356424264924e-05, - "loss": 0.6278, - "step": 70590 - }, - { - "epoch": 0.6241270178044166, - "grad_norm": 6.953359127044678, - "learning_rate": 3.959788303659306e-05, - "loss": 0.7659, - "step": 70600 - }, - { - "epoch": 0.6242154210647288, - "grad_norm": 4.40071439743042, - "learning_rate": 3.959640964892119e-05, - "loss": 0.5564, - "step": 70610 - }, - { - "epoch": 0.6243038243250411, - "grad_norm": 4.113469123840332, - "learning_rate": 3.9594936261249316e-05, - "loss": 0.7422, - "step": 70620 - }, - { - "epoch": 0.6243922275853534, - "grad_norm": 10.757296562194824, - "learning_rate": 3.9593462873577444e-05, - "loss": 0.664, - "step": 70630 - }, - { - "epoch": 0.6244806308456656, - "grad_norm": 1.3857872486114502, - "learning_rate": 3.959198948590557e-05, - "loss": 0.5617, - "step": 70640 - }, - { - "epoch": 0.6245690341059779, - "grad_norm": 12.130651473999023, - "learning_rate": 3.95905160982337e-05, - "loss": 0.5965, - "step": 70650 - }, - { - "epoch": 0.6246574373662901, - "grad_norm": 1.5708321332931519, - "learning_rate": 3.9589042710561836e-05, - "loss": 0.4673, - "step": 70660 - }, - { - "epoch": 0.6247458406266023, - "grad_norm": 1.8471735715866089, - "learning_rate": 3.9587569322889965e-05, - "loss": 0.6611, - "step": 70670 - }, - { - "epoch": 0.6248342438869146, - "grad_norm": 5.520918846130371, - "learning_rate": 3.958609593521809e-05, - "loss": 0.6115, - "step": 70680 - }, - { - "epoch": 0.6249226471472268, - "grad_norm": 5.764188766479492, - "learning_rate": 3.958462254754622e-05, - "loss": 0.7145, - "step": 70690 - }, - { - "epoch": 0.625011050407539, - "grad_norm": 3.201566219329834, - "learning_rate": 3.958314915987435e-05, - "loss": 0.6039, - "step": 70700 - }, - { - "epoch": 0.6250994536678512, - "grad_norm": 1.7584728002548218, - "learning_rate": 3.958167577220248e-05, - "loss": 0.7577, - "step": 70710 - }, - { - "epoch": 0.6251878569281635, - "grad_norm": 2.0485239028930664, - "learning_rate": 3.958020238453061e-05, - "loss": 0.8059, - "step": 70720 - }, - { - "epoch": 0.6252762601884757, - "grad_norm": 1.824622392654419, - "learning_rate": 3.957872899685874e-05, - "loss": 0.6871, - "step": 70730 - }, - { - "epoch": 0.6253646634487879, - "grad_norm": 7.051321506500244, - "learning_rate": 3.957725560918687e-05, - "loss": 0.6122, - "step": 70740 - }, - { - "epoch": 0.6254530667091003, - "grad_norm": 3.967787027359009, - "learning_rate": 3.9575782221515e-05, - "loss": 0.6527, - "step": 70750 - }, - { - "epoch": 0.6255414699694125, - "grad_norm": 1.4101231098175049, - "learning_rate": 3.9574308833843126e-05, - "loss": 0.6156, - "step": 70760 - }, - { - "epoch": 0.6256298732297247, - "grad_norm": 4.428545951843262, - "learning_rate": 3.9572835446171255e-05, - "loss": 0.6416, - "step": 70770 - }, - { - "epoch": 0.625718276490037, - "grad_norm": 5.919891834259033, - "learning_rate": 3.957136205849938e-05, - "loss": 0.7208, - "step": 70780 - }, - { - "epoch": 0.6258066797503492, - "grad_norm": 14.246026039123535, - "learning_rate": 3.956988867082752e-05, - "loss": 0.7137, - "step": 70790 - }, - { - "epoch": 0.6258950830106614, - "grad_norm": 2.3636012077331543, - "learning_rate": 3.956841528315565e-05, - "loss": 0.6627, - "step": 70800 - }, - { - "epoch": 0.6259834862709737, - "grad_norm": 5.053730487823486, - "learning_rate": 3.9566941895483775e-05, - "loss": 0.6867, - "step": 70810 - }, - { - "epoch": 0.6260718895312859, - "grad_norm": 4.1448540687561035, - "learning_rate": 3.95654685078119e-05, - "loss": 0.769, - "step": 70820 - }, - { - "epoch": 0.6261602927915981, - "grad_norm": 1.0152547359466553, - "learning_rate": 3.956399512014003e-05, - "loss": 0.5611, - "step": 70830 - }, - { - "epoch": 0.6262486960519104, - "grad_norm": 3.6613223552703857, - "learning_rate": 3.956252173246816e-05, - "loss": 0.6594, - "step": 70840 - }, - { - "epoch": 0.6263370993122226, - "grad_norm": 1.9071577787399292, - "learning_rate": 3.9561048344796295e-05, - "loss": 0.6842, - "step": 70850 - }, - { - "epoch": 0.6264255025725348, - "grad_norm": 11.972447395324707, - "learning_rate": 3.9559574957124424e-05, - "loss": 0.5839, - "step": 70860 - }, - { - "epoch": 0.6265139058328472, - "grad_norm": 3.9560227394104004, - "learning_rate": 3.955810156945255e-05, - "loss": 0.6597, - "step": 70870 - }, - { - "epoch": 0.6266023090931594, - "grad_norm": 3.1316449642181396, - "learning_rate": 3.955662818178068e-05, - "loss": 0.7728, - "step": 70880 - }, - { - "epoch": 0.6266907123534716, - "grad_norm": 7.84269380569458, - "learning_rate": 3.955515479410881e-05, - "loss": 0.6914, - "step": 70890 - }, - { - "epoch": 0.6267791156137839, - "grad_norm": 3.6713805198669434, - "learning_rate": 3.955368140643694e-05, - "loss": 0.6, - "step": 70900 - }, - { - "epoch": 0.6268675188740961, - "grad_norm": 7.718493938446045, - "learning_rate": 3.955220801876507e-05, - "loss": 0.7166, - "step": 70910 - }, - { - "epoch": 0.6269559221344083, - "grad_norm": 6.260168552398682, - "learning_rate": 3.9550734631093194e-05, - "loss": 0.5618, - "step": 70920 - }, - { - "epoch": 0.6270443253947205, - "grad_norm": 2.636608123779297, - "learning_rate": 3.954926124342133e-05, - "loss": 0.6655, - "step": 70930 - }, - { - "epoch": 0.6271327286550328, - "grad_norm": 4.675978660583496, - "learning_rate": 3.954778785574946e-05, - "loss": 0.6802, - "step": 70940 - }, - { - "epoch": 0.627221131915345, - "grad_norm": 1.7388070821762085, - "learning_rate": 3.9546314468077586e-05, - "loss": 0.7387, - "step": 70950 - }, - { - "epoch": 0.6273095351756572, - "grad_norm": 2.636258125305176, - "learning_rate": 3.9544841080405714e-05, - "loss": 0.6117, - "step": 70960 - }, - { - "epoch": 0.6273979384359695, - "grad_norm": 3.331146240234375, - "learning_rate": 3.954336769273385e-05, - "loss": 0.78, - "step": 70970 - }, - { - "epoch": 0.6274863416962818, - "grad_norm": 3.7574076652526855, - "learning_rate": 3.954189430506197e-05, - "loss": 0.7342, - "step": 70980 - }, - { - "epoch": 0.627574744956594, - "grad_norm": 1.7998695373535156, - "learning_rate": 3.9540420917390106e-05, - "loss": 0.6975, - "step": 70990 - }, - { - "epoch": 0.6276631482169063, - "grad_norm": 3.9677886962890625, - "learning_rate": 3.953894752971823e-05, - "loss": 0.8364, - "step": 71000 - }, - { - "epoch": 0.6277515514772185, - "grad_norm": 2.983008861541748, - "learning_rate": 3.953747414204636e-05, - "loss": 0.6461, - "step": 71010 - }, - { - "epoch": 0.6278399547375307, - "grad_norm": 1.4710510969161987, - "learning_rate": 3.953600075437449e-05, - "loss": 0.6559, - "step": 71020 - }, - { - "epoch": 0.627928357997843, - "grad_norm": 1.6924302577972412, - "learning_rate": 3.953452736670262e-05, - "loss": 0.6297, - "step": 71030 - }, - { - "epoch": 0.6280167612581552, - "grad_norm": 1.9899982213974, - "learning_rate": 3.953305397903075e-05, - "loss": 0.6832, - "step": 71040 - }, - { - "epoch": 0.6281051645184674, - "grad_norm": 1.574859619140625, - "learning_rate": 3.953158059135888e-05, - "loss": 0.5918, - "step": 71050 - }, - { - "epoch": 0.6281935677787797, - "grad_norm": 6.533076286315918, - "learning_rate": 3.9530107203687004e-05, - "loss": 0.7408, - "step": 71060 - }, - { - "epoch": 0.6282819710390919, - "grad_norm": 7.978692531585693, - "learning_rate": 3.952863381601514e-05, - "loss": 0.5992, - "step": 71070 - }, - { - "epoch": 0.6283703742994041, - "grad_norm": 8.623467445373535, - "learning_rate": 3.952716042834327e-05, - "loss": 0.5835, - "step": 71080 - }, - { - "epoch": 0.6284587775597164, - "grad_norm": 7.112682819366455, - "learning_rate": 3.9525687040671396e-05, - "loss": 0.6871, - "step": 71090 - }, - { - "epoch": 0.6285471808200287, - "grad_norm": 8.423845291137695, - "learning_rate": 3.9524213652999524e-05, - "loss": 0.6075, - "step": 71100 - }, - { - "epoch": 0.6286355840803409, - "grad_norm": 1.8750859498977661, - "learning_rate": 3.952274026532765e-05, - "loss": 0.821, - "step": 71110 - }, - { - "epoch": 0.6287239873406532, - "grad_norm": 7.829469203948975, - "learning_rate": 3.952126687765578e-05, - "loss": 0.6275, - "step": 71120 - }, - { - "epoch": 0.6288123906009654, - "grad_norm": 2.229738473892212, - "learning_rate": 3.9519793489983916e-05, - "loss": 0.7512, - "step": 71130 - }, - { - "epoch": 0.6289007938612776, - "grad_norm": 3.0698466300964355, - "learning_rate": 3.951832010231204e-05, - "loss": 0.665, - "step": 71140 - }, - { - "epoch": 0.6289891971215898, - "grad_norm": 9.702445030212402, - "learning_rate": 3.951684671464017e-05, - "loss": 0.7262, - "step": 71150 - }, - { - "epoch": 0.6290776003819021, - "grad_norm": 5.001696586608887, - "learning_rate": 3.95153733269683e-05, - "loss": 0.7692, - "step": 71160 - }, - { - "epoch": 0.6291660036422143, - "grad_norm": 5.729146480560303, - "learning_rate": 3.951389993929643e-05, - "loss": 0.7092, - "step": 71170 - }, - { - "epoch": 0.6292544069025265, - "grad_norm": 2.534942150115967, - "learning_rate": 3.951242655162456e-05, - "loss": 0.5682, - "step": 71180 - }, - { - "epoch": 0.6293428101628388, - "grad_norm": 2.6672048568725586, - "learning_rate": 3.951095316395269e-05, - "loss": 0.6661, - "step": 71190 - }, - { - "epoch": 0.629431213423151, - "grad_norm": 5.888321876525879, - "learning_rate": 3.9509479776280815e-05, - "loss": 0.6581, - "step": 71200 - }, - { - "epoch": 0.6295196166834632, - "grad_norm": 4.407350540161133, - "learning_rate": 3.950800638860895e-05, - "loss": 0.6108, - "step": 71210 - }, - { - "epoch": 0.6296080199437756, - "grad_norm": 3.452178478240967, - "learning_rate": 3.950653300093707e-05, - "loss": 0.6308, - "step": 71220 - }, - { - "epoch": 0.6296964232040878, - "grad_norm": 1.931299090385437, - "learning_rate": 3.9505059613265207e-05, - "loss": 0.6578, - "step": 71230 - }, - { - "epoch": 0.6297848264644, - "grad_norm": 2.254317045211792, - "learning_rate": 3.9503586225593335e-05, - "loss": 0.7416, - "step": 71240 - }, - { - "epoch": 0.6298732297247123, - "grad_norm": 1.2741414308547974, - "learning_rate": 3.950211283792146e-05, - "loss": 0.7595, - "step": 71250 - }, - { - "epoch": 0.6299616329850245, - "grad_norm": 4.544749736785889, - "learning_rate": 3.950063945024959e-05, - "loss": 0.5759, - "step": 71260 - }, - { - "epoch": 0.6300500362453367, - "grad_norm": 2.699962854385376, - "learning_rate": 3.949916606257773e-05, - "loss": 0.7419, - "step": 71270 - }, - { - "epoch": 0.630138439505649, - "grad_norm": 5.524005889892578, - "learning_rate": 3.949769267490585e-05, - "loss": 0.6595, - "step": 71280 - }, - { - "epoch": 0.6302268427659612, - "grad_norm": 2.926417589187622, - "learning_rate": 3.9496219287233983e-05, - "loss": 0.6159, - "step": 71290 - }, - { - "epoch": 0.6303152460262734, - "grad_norm": 0.8104326725006104, - "learning_rate": 3.949474589956211e-05, - "loss": 0.5544, - "step": 71300 - }, - { - "epoch": 0.6304036492865857, - "grad_norm": 21.157617568969727, - "learning_rate": 3.949327251189024e-05, - "loss": 0.6606, - "step": 71310 - }, - { - "epoch": 0.6304920525468979, - "grad_norm": 4.392067909240723, - "learning_rate": 3.949179912421837e-05, - "loss": 0.731, - "step": 71320 - }, - { - "epoch": 0.6305804558072101, - "grad_norm": 11.362103462219238, - "learning_rate": 3.9490325736546504e-05, - "loss": 0.7257, - "step": 71330 - }, - { - "epoch": 0.6306688590675225, - "grad_norm": 6.68247127532959, - "learning_rate": 3.9488852348874625e-05, - "loss": 0.592, - "step": 71340 - }, - { - "epoch": 0.6307572623278347, - "grad_norm": 13.463199615478516, - "learning_rate": 3.948737896120276e-05, - "loss": 0.6858, - "step": 71350 - }, - { - "epoch": 0.6308456655881469, - "grad_norm": 2.395317792892456, - "learning_rate": 3.948590557353088e-05, - "loss": 0.6241, - "step": 71360 - }, - { - "epoch": 0.6309340688484592, - "grad_norm": 5.0932512283325195, - "learning_rate": 3.948443218585902e-05, - "loss": 0.6482, - "step": 71370 - }, - { - "epoch": 0.6310224721087714, - "grad_norm": 2.647167682647705, - "learning_rate": 3.9482958798187145e-05, - "loss": 0.6506, - "step": 71380 - }, - { - "epoch": 0.6311108753690836, - "grad_norm": 6.654621601104736, - "learning_rate": 3.9481485410515274e-05, - "loss": 0.6528, - "step": 71390 - }, - { - "epoch": 0.6311992786293958, - "grad_norm": 8.494424819946289, - "learning_rate": 3.94800120228434e-05, - "loss": 0.6563, - "step": 71400 - }, - { - "epoch": 0.6312876818897081, - "grad_norm": 3.0831310749053955, - "learning_rate": 3.947853863517154e-05, - "loss": 0.7338, - "step": 71410 - }, - { - "epoch": 0.6313760851500203, - "grad_norm": 6.151607513427734, - "learning_rate": 3.947706524749966e-05, - "loss": 0.6282, - "step": 71420 - }, - { - "epoch": 0.6314644884103325, - "grad_norm": 4.34309720993042, - "learning_rate": 3.9475591859827794e-05, - "loss": 0.7178, - "step": 71430 - }, - { - "epoch": 0.6315528916706448, - "grad_norm": 1.1920831203460693, - "learning_rate": 3.947411847215592e-05, - "loss": 0.7459, - "step": 71440 - }, - { - "epoch": 0.6316412949309571, - "grad_norm": 1.8974387645721436, - "learning_rate": 3.947264508448405e-05, - "loss": 0.7442, - "step": 71450 - }, - { - "epoch": 0.6317296981912693, - "grad_norm": 4.991611480712891, - "learning_rate": 3.947117169681218e-05, - "loss": 0.7063, - "step": 71460 - }, - { - "epoch": 0.6318181014515816, - "grad_norm": 12.901622772216797, - "learning_rate": 3.946969830914031e-05, - "loss": 0.7322, - "step": 71470 - }, - { - "epoch": 0.6319065047118938, - "grad_norm": 6.449521064758301, - "learning_rate": 3.9468224921468436e-05, - "loss": 0.5937, - "step": 71480 - }, - { - "epoch": 0.631994907972206, - "grad_norm": 2.001889705657959, - "learning_rate": 3.946675153379657e-05, - "loss": 0.5844, - "step": 71490 - }, - { - "epoch": 0.6320833112325183, - "grad_norm": 9.098803520202637, - "learning_rate": 3.946527814612469e-05, - "loss": 0.6801, - "step": 71500 - }, - { - "epoch": 0.6321717144928305, - "grad_norm": 8.762860298156738, - "learning_rate": 3.946380475845283e-05, - "loss": 0.8125, - "step": 71510 - }, - { - "epoch": 0.6322601177531427, - "grad_norm": 1.3403794765472412, - "learning_rate": 3.9462331370780956e-05, - "loss": 0.6922, - "step": 71520 - }, - { - "epoch": 0.632348521013455, - "grad_norm": 1.946393370628357, - "learning_rate": 3.9460857983109084e-05, - "loss": 0.7752, - "step": 71530 - }, - { - "epoch": 0.6324369242737672, - "grad_norm": 11.131938934326172, - "learning_rate": 3.945938459543721e-05, - "loss": 0.6543, - "step": 71540 - }, - { - "epoch": 0.6325253275340794, - "grad_norm": 8.733319282531738, - "learning_rate": 3.945791120776535e-05, - "loss": 0.5897, - "step": 71550 - }, - { - "epoch": 0.6326137307943916, - "grad_norm": 3.5438811779022217, - "learning_rate": 3.945643782009347e-05, - "loss": 0.6385, - "step": 71560 - }, - { - "epoch": 0.632702134054704, - "grad_norm": 8.746137619018555, - "learning_rate": 3.9454964432421604e-05, - "loss": 0.5731, - "step": 71570 - }, - { - "epoch": 0.6327905373150162, - "grad_norm": 3.85282039642334, - "learning_rate": 3.945349104474973e-05, - "loss": 0.6728, - "step": 71580 - }, - { - "epoch": 0.6328789405753285, - "grad_norm": 1.4692754745483398, - "learning_rate": 3.945201765707786e-05, - "loss": 0.6366, - "step": 71590 - }, - { - "epoch": 0.6329673438356407, - "grad_norm": 2.382704257965088, - "learning_rate": 3.945054426940599e-05, - "loss": 0.6038, - "step": 71600 - }, - { - "epoch": 0.6330557470959529, - "grad_norm": 3.2350873947143555, - "learning_rate": 3.944907088173412e-05, - "loss": 0.7405, - "step": 71610 - }, - { - "epoch": 0.6331441503562651, - "grad_norm": 1.6920549869537354, - "learning_rate": 3.9447597494062246e-05, - "loss": 0.6516, - "step": 71620 - }, - { - "epoch": 0.6332325536165774, - "grad_norm": 3.844879150390625, - "learning_rate": 3.944612410639038e-05, - "loss": 0.6012, - "step": 71630 - }, - { - "epoch": 0.6333209568768896, - "grad_norm": 3.9348480701446533, - "learning_rate": 3.944465071871851e-05, - "loss": 0.7567, - "step": 71640 - }, - { - "epoch": 0.6334093601372018, - "grad_norm": 3.7274162769317627, - "learning_rate": 3.944317733104664e-05, - "loss": 0.6956, - "step": 71650 - }, - { - "epoch": 0.6334977633975141, - "grad_norm": 7.328160762786865, - "learning_rate": 3.9441703943374766e-05, - "loss": 0.6459, - "step": 71660 - }, - { - "epoch": 0.6335861666578263, - "grad_norm": 3.333332061767578, - "learning_rate": 3.9440230555702895e-05, - "loss": 0.5662, - "step": 71670 - }, - { - "epoch": 0.6336745699181385, - "grad_norm": 7.647139072418213, - "learning_rate": 3.943875716803102e-05, - "loss": 0.6997, - "step": 71680 - }, - { - "epoch": 0.6337629731784509, - "grad_norm": 1.4852776527404785, - "learning_rate": 3.943728378035915e-05, - "loss": 0.7116, - "step": 71690 - }, - { - "epoch": 0.6338513764387631, - "grad_norm": 3.6578001976013184, - "learning_rate": 3.943581039268729e-05, - "loss": 0.6353, - "step": 71700 - }, - { - "epoch": 0.6339397796990753, - "grad_norm": 4.410030364990234, - "learning_rate": 3.9434337005015415e-05, - "loss": 0.5709, - "step": 71710 - }, - { - "epoch": 0.6340281829593876, - "grad_norm": 2.205988883972168, - "learning_rate": 3.943286361734354e-05, - "loss": 0.7324, - "step": 71720 - }, - { - "epoch": 0.6341165862196998, - "grad_norm": 3.9057741165161133, - "learning_rate": 3.943139022967167e-05, - "loss": 0.7229, - "step": 71730 - }, - { - "epoch": 0.634204989480012, - "grad_norm": 2.1983866691589355, - "learning_rate": 3.94299168419998e-05, - "loss": 0.6926, - "step": 71740 - }, - { - "epoch": 0.6342933927403243, - "grad_norm": 3.0037119388580322, - "learning_rate": 3.942844345432793e-05, - "loss": 0.63, - "step": 71750 - }, - { - "epoch": 0.6343817960006365, - "grad_norm": 22.46878433227539, - "learning_rate": 3.9426970066656064e-05, - "loss": 0.7218, - "step": 71760 - }, - { - "epoch": 0.6344701992609487, - "grad_norm": 3.7411534786224365, - "learning_rate": 3.942549667898419e-05, - "loss": 0.6539, - "step": 71770 - }, - { - "epoch": 0.634558602521261, - "grad_norm": 2.6895134449005127, - "learning_rate": 3.942402329131232e-05, - "loss": 0.6616, - "step": 71780 - }, - { - "epoch": 0.6346470057815732, - "grad_norm": 1.4705055952072144, - "learning_rate": 3.942254990364045e-05, - "loss": 0.7168, - "step": 71790 - }, - { - "epoch": 0.6347354090418854, - "grad_norm": 2.9868435859680176, - "learning_rate": 3.942107651596858e-05, - "loss": 0.714, - "step": 71800 - }, - { - "epoch": 0.6348238123021978, - "grad_norm": 2.1962103843688965, - "learning_rate": 3.9419603128296705e-05, - "loss": 0.7727, - "step": 71810 - }, - { - "epoch": 0.63491221556251, - "grad_norm": 2.4985291957855225, - "learning_rate": 3.941812974062484e-05, - "loss": 0.7119, - "step": 71820 - }, - { - "epoch": 0.6350006188228222, - "grad_norm": 2.670518398284912, - "learning_rate": 3.941665635295296e-05, - "loss": 0.8273, - "step": 71830 - }, - { - "epoch": 0.6350890220831344, - "grad_norm": 1.5731847286224365, - "learning_rate": 3.94151829652811e-05, - "loss": 0.6485, - "step": 71840 - }, - { - "epoch": 0.6351774253434467, - "grad_norm": 3.0562798976898193, - "learning_rate": 3.9413709577609225e-05, - "loss": 0.618, - "step": 71850 - }, - { - "epoch": 0.6352658286037589, - "grad_norm": 2.1619131565093994, - "learning_rate": 3.9412236189937354e-05, - "loss": 0.7004, - "step": 71860 - }, - { - "epoch": 0.6353542318640711, - "grad_norm": 3.9373903274536133, - "learning_rate": 3.941076280226548e-05, - "loss": 0.4843, - "step": 71870 - }, - { - "epoch": 0.6354426351243834, - "grad_norm": 9.931157112121582, - "learning_rate": 3.940928941459362e-05, - "loss": 0.7468, - "step": 71880 - }, - { - "epoch": 0.6355310383846956, - "grad_norm": 4.420377731323242, - "learning_rate": 3.940781602692174e-05, - "loss": 0.5889, - "step": 71890 - }, - { - "epoch": 0.6356194416450078, - "grad_norm": 2.833367109298706, - "learning_rate": 3.9406342639249874e-05, - "loss": 0.6528, - "step": 71900 - }, - { - "epoch": 0.6357078449053201, - "grad_norm": 33.024906158447266, - "learning_rate": 3.9404869251578e-05, - "loss": 0.6006, - "step": 71910 - }, - { - "epoch": 0.6357962481656323, - "grad_norm": 3.4277443885803223, - "learning_rate": 3.940339586390613e-05, - "loss": 0.7393, - "step": 71920 - }, - { - "epoch": 0.6358846514259446, - "grad_norm": 3.256603717803955, - "learning_rate": 3.940192247623426e-05, - "loss": 0.7508, - "step": 71930 - }, - { - "epoch": 0.6359730546862569, - "grad_norm": 2.869668960571289, - "learning_rate": 3.940044908856239e-05, - "loss": 0.7701, - "step": 71940 - }, - { - "epoch": 0.6360614579465691, - "grad_norm": 1.2516562938690186, - "learning_rate": 3.9398975700890516e-05, - "loss": 0.6374, - "step": 71950 - }, - { - "epoch": 0.6361498612068813, - "grad_norm": 7.036595821380615, - "learning_rate": 3.939750231321865e-05, - "loss": 0.7108, - "step": 71960 - }, - { - "epoch": 0.6362382644671936, - "grad_norm": 0.8625949025154114, - "learning_rate": 3.939602892554677e-05, - "loss": 0.599, - "step": 71970 - }, - { - "epoch": 0.6363266677275058, - "grad_norm": 12.085440635681152, - "learning_rate": 3.939455553787491e-05, - "loss": 0.6241, - "step": 71980 - }, - { - "epoch": 0.636415070987818, - "grad_norm": 2.5847713947296143, - "learning_rate": 3.9393082150203036e-05, - "loss": 0.6443, - "step": 71990 - }, - { - "epoch": 0.6365034742481303, - "grad_norm": 16.710302352905273, - "learning_rate": 3.9391608762531164e-05, - "loss": 0.7294, - "step": 72000 - }, - { - "epoch": 0.6365918775084425, - "grad_norm": 2.764096736907959, - "learning_rate": 3.939013537485929e-05, - "loss": 0.6141, - "step": 72010 - }, - { - "epoch": 0.6366802807687547, - "grad_norm": 12.803329467773438, - "learning_rate": 3.938866198718743e-05, - "loss": 0.6715, - "step": 72020 - }, - { - "epoch": 0.636768684029067, - "grad_norm": 2.818300485610962, - "learning_rate": 3.938718859951555e-05, - "loss": 0.6954, - "step": 72030 - }, - { - "epoch": 0.6368570872893793, - "grad_norm": 11.248153686523438, - "learning_rate": 3.9385715211843685e-05, - "loss": 0.857, - "step": 72040 - }, - { - "epoch": 0.6369454905496915, - "grad_norm": 4.570161819458008, - "learning_rate": 3.9384241824171806e-05, - "loss": 0.669, - "step": 72050 - }, - { - "epoch": 0.6370338938100037, - "grad_norm": 4.431693077087402, - "learning_rate": 3.938276843649994e-05, - "loss": 0.6775, - "step": 72060 - }, - { - "epoch": 0.637122297070316, - "grad_norm": 3.9414236545562744, - "learning_rate": 3.938129504882807e-05, - "loss": 0.733, - "step": 72070 - }, - { - "epoch": 0.6372107003306282, - "grad_norm": 4.171746253967285, - "learning_rate": 3.93798216611562e-05, - "loss": 0.7131, - "step": 72080 - }, - { - "epoch": 0.6372991035909404, - "grad_norm": 2.7596962451934814, - "learning_rate": 3.9378348273484326e-05, - "loss": 0.7805, - "step": 72090 - }, - { - "epoch": 0.6373875068512527, - "grad_norm": 3.1972498893737793, - "learning_rate": 3.937687488581246e-05, - "loss": 0.604, - "step": 72100 - }, - { - "epoch": 0.6374759101115649, - "grad_norm": 1.2993907928466797, - "learning_rate": 3.937540149814058e-05, - "loss": 0.7639, - "step": 72110 - }, - { - "epoch": 0.6375643133718771, - "grad_norm": 3.5008013248443604, - "learning_rate": 3.937392811046872e-05, - "loss": 0.7438, - "step": 72120 - }, - { - "epoch": 0.6376527166321894, - "grad_norm": 2.820345878601074, - "learning_rate": 3.9372454722796846e-05, - "loss": 0.7525, - "step": 72130 - }, - { - "epoch": 0.6377411198925016, - "grad_norm": 3.930330991744995, - "learning_rate": 3.9370981335124975e-05, - "loss": 0.6784, - "step": 72140 - }, - { - "epoch": 0.6378295231528138, - "grad_norm": 2.45150089263916, - "learning_rate": 3.93695079474531e-05, - "loss": 0.7035, - "step": 72150 - }, - { - "epoch": 0.6379179264131262, - "grad_norm": 4.0196852684021, - "learning_rate": 3.936803455978123e-05, - "loss": 0.6605, - "step": 72160 - }, - { - "epoch": 0.6380063296734384, - "grad_norm": 0.7428300380706787, - "learning_rate": 3.936656117210936e-05, - "loss": 0.6295, - "step": 72170 - }, - { - "epoch": 0.6380947329337506, - "grad_norm": 2.0799736976623535, - "learning_rate": 3.9365087784437495e-05, - "loss": 0.7059, - "step": 72180 - }, - { - "epoch": 0.6381831361940629, - "grad_norm": 2.508800745010376, - "learning_rate": 3.936361439676562e-05, - "loss": 0.5304, - "step": 72190 - }, - { - "epoch": 0.6382715394543751, - "grad_norm": 9.046399116516113, - "learning_rate": 3.936214100909375e-05, - "loss": 0.5919, - "step": 72200 - }, - { - "epoch": 0.6383599427146873, - "grad_norm": 5.714117050170898, - "learning_rate": 3.936066762142188e-05, - "loss": 0.7344, - "step": 72210 - }, - { - "epoch": 0.6384483459749996, - "grad_norm": 2.476715326309204, - "learning_rate": 3.935919423375001e-05, - "loss": 0.9094, - "step": 72220 - }, - { - "epoch": 0.6385367492353118, - "grad_norm": 2.1051700115203857, - "learning_rate": 3.935772084607814e-05, - "loss": 0.6232, - "step": 72230 - }, - { - "epoch": 0.638625152495624, - "grad_norm": 5.702798366546631, - "learning_rate": 3.935624745840627e-05, - "loss": 0.5859, - "step": 72240 - }, - { - "epoch": 0.6387135557559362, - "grad_norm": 3.238276243209839, - "learning_rate": 3.9354774070734394e-05, - "loss": 0.7372, - "step": 72250 - }, - { - "epoch": 0.6388019590162485, - "grad_norm": 1.8214185237884521, - "learning_rate": 3.935330068306253e-05, - "loss": 0.5912, - "step": 72260 - }, - { - "epoch": 0.6388903622765607, - "grad_norm": 2.8009698390960693, - "learning_rate": 3.935182729539066e-05, - "loss": 0.7354, - "step": 72270 - }, - { - "epoch": 0.638978765536873, - "grad_norm": 4.564553737640381, - "learning_rate": 3.9350353907718785e-05, - "loss": 0.6423, - "step": 72280 - }, - { - "epoch": 0.6390671687971853, - "grad_norm": 3.0105106830596924, - "learning_rate": 3.9348880520046914e-05, - "loss": 0.8495, - "step": 72290 - }, - { - "epoch": 0.6391555720574975, - "grad_norm": 13.156732559204102, - "learning_rate": 3.934740713237504e-05, - "loss": 0.7583, - "step": 72300 - }, - { - "epoch": 0.6392439753178097, - "grad_norm": 4.095754623413086, - "learning_rate": 3.934593374470317e-05, - "loss": 0.5831, - "step": 72310 - }, - { - "epoch": 0.639332378578122, - "grad_norm": 6.197402477264404, - "learning_rate": 3.9344460357031306e-05, - "loss": 0.6996, - "step": 72320 - }, - { - "epoch": 0.6394207818384342, - "grad_norm": 5.47165584564209, - "learning_rate": 3.934298696935943e-05, - "loss": 0.8331, - "step": 72330 - }, - { - "epoch": 0.6395091850987464, - "grad_norm": 9.038413047790527, - "learning_rate": 3.934151358168756e-05, - "loss": 0.7595, - "step": 72340 - }, - { - "epoch": 0.6395975883590587, - "grad_norm": 1.7033380270004272, - "learning_rate": 3.934004019401569e-05, - "loss": 0.6425, - "step": 72350 - }, - { - "epoch": 0.6396859916193709, - "grad_norm": 2.089405059814453, - "learning_rate": 3.933856680634382e-05, - "loss": 0.539, - "step": 72360 - }, - { - "epoch": 0.6397743948796831, - "grad_norm": 10.20825481414795, - "learning_rate": 3.933709341867195e-05, - "loss": 0.577, - "step": 72370 - }, - { - "epoch": 0.6398627981399954, - "grad_norm": 2.216937303543091, - "learning_rate": 3.933562003100008e-05, - "loss": 0.8453, - "step": 72380 - }, - { - "epoch": 0.6399512014003076, - "grad_norm": 4.528128147125244, - "learning_rate": 3.9334146643328204e-05, - "loss": 0.6589, - "step": 72390 - }, - { - "epoch": 0.6400396046606199, - "grad_norm": 3.941056728363037, - "learning_rate": 3.933267325565634e-05, - "loss": 0.6726, - "step": 72400 - }, - { - "epoch": 0.6401280079209322, - "grad_norm": 2.947174310684204, - "learning_rate": 3.933119986798446e-05, - "loss": 0.5928, - "step": 72410 - }, - { - "epoch": 0.6402164111812444, - "grad_norm": 6.511383533477783, - "learning_rate": 3.9329726480312596e-05, - "loss": 0.6863, - "step": 72420 - }, - { - "epoch": 0.6403048144415566, - "grad_norm": 6.1176066398620605, - "learning_rate": 3.9328253092640724e-05, - "loss": 0.7101, - "step": 72430 - }, - { - "epoch": 0.6403932177018689, - "grad_norm": 1.474511742591858, - "learning_rate": 3.932677970496885e-05, - "loss": 0.6886, - "step": 72440 - }, - { - "epoch": 0.6404816209621811, - "grad_norm": 3.9815256595611572, - "learning_rate": 3.932530631729698e-05, - "loss": 0.6555, - "step": 72450 - }, - { - "epoch": 0.6405700242224933, - "grad_norm": 2.5388100147247314, - "learning_rate": 3.9323832929625116e-05, - "loss": 0.6699, - "step": 72460 - }, - { - "epoch": 0.6406584274828055, - "grad_norm": 2.893710136413574, - "learning_rate": 3.932235954195324e-05, - "loss": 0.7019, - "step": 72470 - }, - { - "epoch": 0.6407468307431178, - "grad_norm": 6.327343940734863, - "learning_rate": 3.932088615428137e-05, - "loss": 0.7938, - "step": 72480 - }, - { - "epoch": 0.64083523400343, - "grad_norm": 3.7004594802856445, - "learning_rate": 3.93194127666095e-05, - "loss": 0.7295, - "step": 72490 - }, - { - "epoch": 0.6409236372637422, - "grad_norm": 4.380626201629639, - "learning_rate": 3.931793937893763e-05, - "loss": 0.6347, - "step": 72500 - }, - { - "epoch": 0.6410120405240545, - "grad_norm": 3.8228535652160645, - "learning_rate": 3.931646599126576e-05, - "loss": 0.6666, - "step": 72510 - }, - { - "epoch": 0.6411004437843668, - "grad_norm": 4.612709999084473, - "learning_rate": 3.9314992603593886e-05, - "loss": 0.7021, - "step": 72520 - }, - { - "epoch": 0.641188847044679, - "grad_norm": 10.086278915405273, - "learning_rate": 3.9313519215922015e-05, - "loss": 0.6832, - "step": 72530 - }, - { - "epoch": 0.6412772503049913, - "grad_norm": 4.685321807861328, - "learning_rate": 3.931204582825015e-05, - "loss": 0.7379, - "step": 72540 - }, - { - "epoch": 0.6413656535653035, - "grad_norm": 2.3907065391540527, - "learning_rate": 3.931057244057828e-05, - "loss": 0.6257, - "step": 72550 - }, - { - "epoch": 0.6414540568256157, - "grad_norm": 2.8433804512023926, - "learning_rate": 3.9309099052906406e-05, - "loss": 0.6606, - "step": 72560 - }, - { - "epoch": 0.641542460085928, - "grad_norm": 2.0151984691619873, - "learning_rate": 3.9307625665234535e-05, - "loss": 0.5543, - "step": 72570 - }, - { - "epoch": 0.6416308633462402, - "grad_norm": 2.5229897499084473, - "learning_rate": 3.930615227756266e-05, - "loss": 0.7262, - "step": 72580 - }, - { - "epoch": 0.6417192666065524, - "grad_norm": 3.7997162342071533, - "learning_rate": 3.930467888989079e-05, - "loss": 0.6412, - "step": 72590 - }, - { - "epoch": 0.6418076698668647, - "grad_norm": 6.855039119720459, - "learning_rate": 3.9303205502218927e-05, - "loss": 0.603, - "step": 72600 - }, - { - "epoch": 0.6418960731271769, - "grad_norm": 3.914764165878296, - "learning_rate": 3.9301732114547055e-05, - "loss": 0.7359, - "step": 72610 - }, - { - "epoch": 0.6419844763874891, - "grad_norm": 1.7383325099945068, - "learning_rate": 3.930025872687518e-05, - "loss": 0.6613, - "step": 72620 - }, - { - "epoch": 0.6420728796478015, - "grad_norm": 4.043971061706543, - "learning_rate": 3.929878533920331e-05, - "loss": 0.6749, - "step": 72630 - }, - { - "epoch": 0.6421612829081137, - "grad_norm": 1.9985517263412476, - "learning_rate": 3.929731195153144e-05, - "loss": 0.5844, - "step": 72640 - }, - { - "epoch": 0.6422496861684259, - "grad_norm": 3.319026470184326, - "learning_rate": 3.929583856385957e-05, - "loss": 0.6248, - "step": 72650 - }, - { - "epoch": 0.6423380894287382, - "grad_norm": 4.223296165466309, - "learning_rate": 3.92943651761877e-05, - "loss": 0.6825, - "step": 72660 - }, - { - "epoch": 0.6424264926890504, - "grad_norm": 1.7072672843933105, - "learning_rate": 3.929289178851583e-05, - "loss": 0.6081, - "step": 72670 - }, - { - "epoch": 0.6425148959493626, - "grad_norm": 4.584011077880859, - "learning_rate": 3.929141840084396e-05, - "loss": 0.6605, - "step": 72680 - }, - { - "epoch": 0.6426032992096748, - "grad_norm": 3.3608193397521973, - "learning_rate": 3.928994501317209e-05, - "loss": 0.6693, - "step": 72690 - }, - { - "epoch": 0.6426917024699871, - "grad_norm": 2.45196270942688, - "learning_rate": 3.928847162550022e-05, - "loss": 0.7753, - "step": 72700 - }, - { - "epoch": 0.6427801057302993, - "grad_norm": 14.06520938873291, - "learning_rate": 3.9286998237828345e-05, - "loss": 0.8139, - "step": 72710 - }, - { - "epoch": 0.6428685089906115, - "grad_norm": 3.0075533390045166, - "learning_rate": 3.9285524850156474e-05, - "loss": 0.5903, - "step": 72720 - }, - { - "epoch": 0.6429569122509238, - "grad_norm": 3.4011175632476807, - "learning_rate": 3.928405146248461e-05, - "loss": 0.47, - "step": 72730 - }, - { - "epoch": 0.643045315511236, - "grad_norm": 7.843263149261475, - "learning_rate": 3.928257807481274e-05, - "loss": 0.7056, - "step": 72740 - }, - { - "epoch": 0.6431337187715483, - "grad_norm": 7.2828850746154785, - "learning_rate": 3.9281104687140865e-05, - "loss": 0.789, - "step": 72750 - }, - { - "epoch": 0.6432221220318606, - "grad_norm": 5.0123772621154785, - "learning_rate": 3.9279631299468994e-05, - "loss": 0.6655, - "step": 72760 - }, - { - "epoch": 0.6433105252921728, - "grad_norm": 7.071907043457031, - "learning_rate": 3.927815791179712e-05, - "loss": 0.6601, - "step": 72770 - }, - { - "epoch": 0.643398928552485, - "grad_norm": 2.356963634490967, - "learning_rate": 3.927668452412525e-05, - "loss": 0.7259, - "step": 72780 - }, - { - "epoch": 0.6434873318127973, - "grad_norm": 6.7405266761779785, - "learning_rate": 3.9275211136453386e-05, - "loss": 0.7829, - "step": 72790 - }, - { - "epoch": 0.6435757350731095, - "grad_norm": 1.91306734085083, - "learning_rate": 3.927373774878151e-05, - "loss": 0.7477, - "step": 72800 - }, - { - "epoch": 0.6436641383334217, - "grad_norm": 5.131834030151367, - "learning_rate": 3.927226436110964e-05, - "loss": 0.6657, - "step": 72810 - }, - { - "epoch": 0.643752541593734, - "grad_norm": 13.162694931030273, - "learning_rate": 3.927079097343777e-05, - "loss": 0.5233, - "step": 72820 - }, - { - "epoch": 0.6438409448540462, - "grad_norm": 3.790311098098755, - "learning_rate": 3.92693175857659e-05, - "loss": 0.6352, - "step": 72830 - }, - { - "epoch": 0.6439293481143584, - "grad_norm": 5.361180782318115, - "learning_rate": 3.926784419809403e-05, - "loss": 0.7202, - "step": 72840 - }, - { - "epoch": 0.6440177513746707, - "grad_norm": 2.460918426513672, - "learning_rate": 3.926637081042216e-05, - "loss": 0.612, - "step": 72850 - }, - { - "epoch": 0.6441061546349829, - "grad_norm": 7.951444149017334, - "learning_rate": 3.9264897422750284e-05, - "loss": 0.6913, - "step": 72860 - }, - { - "epoch": 0.6441945578952952, - "grad_norm": 1.660313367843628, - "learning_rate": 3.926342403507842e-05, - "loss": 0.8218, - "step": 72870 - }, - { - "epoch": 0.6442829611556075, - "grad_norm": 1.7001129388809204, - "learning_rate": 3.926195064740654e-05, - "loss": 0.7178, - "step": 72880 - }, - { - "epoch": 0.6443713644159197, - "grad_norm": 3.2389354705810547, - "learning_rate": 3.9260477259734676e-05, - "loss": 0.7323, - "step": 72890 - }, - { - "epoch": 0.6444597676762319, - "grad_norm": 2.750436544418335, - "learning_rate": 3.9259003872062804e-05, - "loss": 0.7078, - "step": 72900 - }, - { - "epoch": 0.6445481709365442, - "grad_norm": 3.6820144653320312, - "learning_rate": 3.925753048439093e-05, - "loss": 0.6893, - "step": 72910 - }, - { - "epoch": 0.6446365741968564, - "grad_norm": 4.892619609832764, - "learning_rate": 3.925605709671906e-05, - "loss": 0.606, - "step": 72920 - }, - { - "epoch": 0.6447249774571686, - "grad_norm": 1.9708278179168701, - "learning_rate": 3.9254583709047196e-05, - "loss": 0.5998, - "step": 72930 - }, - { - "epoch": 0.6448133807174808, - "grad_norm": 1.9074394702911377, - "learning_rate": 3.925311032137532e-05, - "loss": 0.5747, - "step": 72940 - }, - { - "epoch": 0.6449017839777931, - "grad_norm": 1.587152361869812, - "learning_rate": 3.925163693370345e-05, - "loss": 0.6174, - "step": 72950 - }, - { - "epoch": 0.6449901872381053, - "grad_norm": 3.8429887294769287, - "learning_rate": 3.925016354603158e-05, - "loss": 0.7796, - "step": 72960 - }, - { - "epoch": 0.6450785904984175, - "grad_norm": 1.057215690612793, - "learning_rate": 3.924869015835971e-05, - "loss": 0.6168, - "step": 72970 - }, - { - "epoch": 0.6451669937587298, - "grad_norm": 1.4331223964691162, - "learning_rate": 3.924721677068784e-05, - "loss": 0.5519, - "step": 72980 - }, - { - "epoch": 0.6452553970190421, - "grad_norm": 5.272140026092529, - "learning_rate": 3.9245743383015966e-05, - "loss": 0.8789, - "step": 72990 - }, - { - "epoch": 0.6453438002793543, - "grad_norm": 25.634410858154297, - "learning_rate": 3.9244269995344095e-05, - "loss": 0.8032, - "step": 73000 - }, - { - "epoch": 0.6454322035396666, - "grad_norm": 1.9800435304641724, - "learning_rate": 3.924279660767223e-05, - "loss": 0.697, - "step": 73010 - }, - { - "epoch": 0.6455206067999788, - "grad_norm": 7.465404987335205, - "learning_rate": 3.924132322000035e-05, - "loss": 0.53, - "step": 73020 - }, - { - "epoch": 0.645609010060291, - "grad_norm": 6.441591262817383, - "learning_rate": 3.9239849832328486e-05, - "loss": 0.5684, - "step": 73030 - }, - { - "epoch": 0.6456974133206033, - "grad_norm": 1.0479837656021118, - "learning_rate": 3.9238376444656615e-05, - "loss": 0.7137, - "step": 73040 - }, - { - "epoch": 0.6457858165809155, - "grad_norm": 1.1749058961868286, - "learning_rate": 3.923690305698474e-05, - "loss": 0.6255, - "step": 73050 - }, - { - "epoch": 0.6458742198412277, - "grad_norm": 6.112790584564209, - "learning_rate": 3.923542966931287e-05, - "loss": 0.6626, - "step": 73060 - }, - { - "epoch": 0.64596262310154, - "grad_norm": 3.223339319229126, - "learning_rate": 3.923395628164101e-05, - "loss": 0.722, - "step": 73070 - }, - { - "epoch": 0.6460510263618522, - "grad_norm": 3.2157599925994873, - "learning_rate": 3.923248289396913e-05, - "loss": 0.7325, - "step": 73080 - }, - { - "epoch": 0.6461394296221644, - "grad_norm": 3.0029873847961426, - "learning_rate": 3.923100950629726e-05, - "loss": 0.5914, - "step": 73090 - }, - { - "epoch": 0.6462278328824768, - "grad_norm": 10.399054527282715, - "learning_rate": 3.9229536118625385e-05, - "loss": 0.748, - "step": 73100 - }, - { - "epoch": 0.646316236142789, - "grad_norm": 2.6047916412353516, - "learning_rate": 3.922806273095352e-05, - "loss": 0.6442, - "step": 73110 - }, - { - "epoch": 0.6464046394031012, - "grad_norm": 1.9666624069213867, - "learning_rate": 3.922658934328165e-05, - "loss": 0.6171, - "step": 73120 - }, - { - "epoch": 0.6464930426634135, - "grad_norm": 2.737152576446533, - "learning_rate": 3.922511595560978e-05, - "loss": 0.6358, - "step": 73130 - }, - { - "epoch": 0.6465814459237257, - "grad_norm": 6.9829936027526855, - "learning_rate": 3.9223642567937905e-05, - "loss": 0.6111, - "step": 73140 - }, - { - "epoch": 0.6466698491840379, - "grad_norm": 1.3174173831939697, - "learning_rate": 3.922216918026604e-05, - "loss": 0.5371, - "step": 73150 - }, - { - "epoch": 0.6467582524443501, - "grad_norm": 3.9846386909484863, - "learning_rate": 3.922069579259416e-05, - "loss": 0.7204, - "step": 73160 - }, - { - "epoch": 0.6468466557046624, - "grad_norm": 7.698777198791504, - "learning_rate": 3.92192224049223e-05, - "loss": 0.6365, - "step": 73170 - }, - { - "epoch": 0.6469350589649746, - "grad_norm": 6.6580328941345215, - "learning_rate": 3.9217749017250425e-05, - "loss": 0.6359, - "step": 73180 - }, - { - "epoch": 0.6470234622252868, - "grad_norm": 5.755401611328125, - "learning_rate": 3.9216275629578554e-05, - "loss": 0.7798, - "step": 73190 - }, - { - "epoch": 0.6471118654855991, - "grad_norm": 1.0191410779953003, - "learning_rate": 3.921480224190668e-05, - "loss": 0.5513, - "step": 73200 - }, - { - "epoch": 0.6472002687459113, - "grad_norm": 7.7609429359436035, - "learning_rate": 3.921332885423482e-05, - "loss": 0.7038, - "step": 73210 - }, - { - "epoch": 0.6472886720062236, - "grad_norm": 1.4561318159103394, - "learning_rate": 3.921185546656294e-05, - "loss": 0.6017, - "step": 73220 - }, - { - "epoch": 0.6473770752665359, - "grad_norm": 2.4102132320404053, - "learning_rate": 3.9210382078891074e-05, - "loss": 0.6685, - "step": 73230 - }, - { - "epoch": 0.6474654785268481, - "grad_norm": 2.9067699909210205, - "learning_rate": 3.9208908691219195e-05, - "loss": 0.7334, - "step": 73240 - }, - { - "epoch": 0.6475538817871603, - "grad_norm": 3.205502510070801, - "learning_rate": 3.920743530354733e-05, - "loss": 0.6654, - "step": 73250 - }, - { - "epoch": 0.6476422850474726, - "grad_norm": 4.8111982345581055, - "learning_rate": 3.920596191587546e-05, - "loss": 0.643, - "step": 73260 - }, - { - "epoch": 0.6477306883077848, - "grad_norm": 5.713198661804199, - "learning_rate": 3.920448852820359e-05, - "loss": 0.7921, - "step": 73270 - }, - { - "epoch": 0.647819091568097, - "grad_norm": 4.404626369476318, - "learning_rate": 3.9203015140531716e-05, - "loss": 0.6053, - "step": 73280 - }, - { - "epoch": 0.6479074948284093, - "grad_norm": 3.414381504058838, - "learning_rate": 3.920154175285985e-05, - "loss": 0.5812, - "step": 73290 - }, - { - "epoch": 0.6479958980887215, - "grad_norm": 1.3864072561264038, - "learning_rate": 3.920006836518797e-05, - "loss": 0.6563, - "step": 73300 - }, - { - "epoch": 0.6480843013490337, - "grad_norm": 7.526893615722656, - "learning_rate": 3.919859497751611e-05, - "loss": 0.6581, - "step": 73310 - }, - { - "epoch": 0.648172704609346, - "grad_norm": 2.8173017501831055, - "learning_rate": 3.9197121589844236e-05, - "loss": 0.6835, - "step": 73320 - }, - { - "epoch": 0.6482611078696582, - "grad_norm": 7.339843273162842, - "learning_rate": 3.9195648202172364e-05, - "loss": 0.7126, - "step": 73330 - }, - { - "epoch": 0.6483495111299705, - "grad_norm": 2.4590420722961426, - "learning_rate": 3.919417481450049e-05, - "loss": 0.6361, - "step": 73340 - }, - { - "epoch": 0.6484379143902828, - "grad_norm": 9.176513671875, - "learning_rate": 3.919270142682862e-05, - "loss": 0.7751, - "step": 73350 - }, - { - "epoch": 0.648526317650595, - "grad_norm": 2.7113230228424072, - "learning_rate": 3.919122803915675e-05, - "loss": 0.7401, - "step": 73360 - }, - { - "epoch": 0.6486147209109072, - "grad_norm": 1.8295503854751587, - "learning_rate": 3.9189754651484884e-05, - "loss": 0.6543, - "step": 73370 - }, - { - "epoch": 0.6487031241712194, - "grad_norm": 2.899273633956909, - "learning_rate": 3.9188281263813006e-05, - "loss": 0.7755, - "step": 73380 - }, - { - "epoch": 0.6487915274315317, - "grad_norm": 2.4900076389312744, - "learning_rate": 3.918680787614114e-05, - "loss": 0.636, - "step": 73390 - }, - { - "epoch": 0.6488799306918439, - "grad_norm": 17.328548431396484, - "learning_rate": 3.918533448846927e-05, - "loss": 0.6923, - "step": 73400 - }, - { - "epoch": 0.6489683339521561, - "grad_norm": 7.702594757080078, - "learning_rate": 3.91838611007974e-05, - "loss": 0.7161, - "step": 73410 - }, - { - "epoch": 0.6490567372124684, - "grad_norm": 1.5657483339309692, - "learning_rate": 3.9182387713125526e-05, - "loss": 0.7198, - "step": 73420 - }, - { - "epoch": 0.6491451404727806, - "grad_norm": 4.969159126281738, - "learning_rate": 3.918091432545366e-05, - "loss": 0.5189, - "step": 73430 - }, - { - "epoch": 0.6492335437330928, - "grad_norm": 7.732271671295166, - "learning_rate": 3.917944093778178e-05, - "loss": 0.6831, - "step": 73440 - }, - { - "epoch": 0.6493219469934051, - "grad_norm": 0.968481719493866, - "learning_rate": 3.917796755010992e-05, - "loss": 0.5961, - "step": 73450 - }, - { - "epoch": 0.6494103502537174, - "grad_norm": 2.046670913696289, - "learning_rate": 3.9176494162438046e-05, - "loss": 0.6725, - "step": 73460 - }, - { - "epoch": 0.6494987535140296, - "grad_norm": 1.2786284685134888, - "learning_rate": 3.9175020774766175e-05, - "loss": 0.7509, - "step": 73470 - }, - { - "epoch": 0.6495871567743419, - "grad_norm": 2.8690474033355713, - "learning_rate": 3.91735473870943e-05, - "loss": 0.6889, - "step": 73480 - }, - { - "epoch": 0.6496755600346541, - "grad_norm": 1.6470656394958496, - "learning_rate": 3.917207399942243e-05, - "loss": 0.6097, - "step": 73490 - }, - { - "epoch": 0.6497639632949663, - "grad_norm": 4.127285480499268, - "learning_rate": 3.917060061175056e-05, - "loss": 0.6348, - "step": 73500 - }, - { - "epoch": 0.6498523665552786, - "grad_norm": 3.5229005813598633, - "learning_rate": 3.9169127224078695e-05, - "loss": 0.6321, - "step": 73510 - }, - { - "epoch": 0.6499407698155908, - "grad_norm": 2.7361483573913574, - "learning_rate": 3.916765383640682e-05, - "loss": 0.6596, - "step": 73520 - }, - { - "epoch": 0.650029173075903, - "grad_norm": 3.1426522731781006, - "learning_rate": 3.916618044873495e-05, - "loss": 0.6338, - "step": 73530 - }, - { - "epoch": 0.6501175763362153, - "grad_norm": 2.4380404949188232, - "learning_rate": 3.916470706106308e-05, - "loss": 0.8314, - "step": 73540 - }, - { - "epoch": 0.6502059795965275, - "grad_norm": 3.5036780834198, - "learning_rate": 3.916323367339121e-05, - "loss": 0.5892, - "step": 73550 - }, - { - "epoch": 0.6502943828568397, - "grad_norm": 1.5960756540298462, - "learning_rate": 3.916176028571934e-05, - "loss": 0.4945, - "step": 73560 - }, - { - "epoch": 0.650382786117152, - "grad_norm": 7.403130054473877, - "learning_rate": 3.916028689804747e-05, - "loss": 0.7256, - "step": 73570 - }, - { - "epoch": 0.6504711893774643, - "grad_norm": 7.227440357208252, - "learning_rate": 3.91588135103756e-05, - "loss": 0.5593, - "step": 73580 - }, - { - "epoch": 0.6505595926377765, - "grad_norm": 3.7225825786590576, - "learning_rate": 3.915734012270373e-05, - "loss": 0.6105, - "step": 73590 - }, - { - "epoch": 0.6506479958980887, - "grad_norm": 2.2574832439422607, - "learning_rate": 3.915586673503186e-05, - "loss": 0.6534, - "step": 73600 - }, - { - "epoch": 0.650736399158401, - "grad_norm": 4.280375003814697, - "learning_rate": 3.9154393347359985e-05, - "loss": 0.5881, - "step": 73610 - }, - { - "epoch": 0.6508248024187132, - "grad_norm": 3.515443801879883, - "learning_rate": 3.9152919959688114e-05, - "loss": 0.7258, - "step": 73620 - }, - { - "epoch": 0.6509132056790254, - "grad_norm": 7.235195636749268, - "learning_rate": 3.915144657201624e-05, - "loss": 0.725, - "step": 73630 - }, - { - "epoch": 0.6510016089393377, - "grad_norm": 2.778562068939209, - "learning_rate": 3.914997318434438e-05, - "loss": 0.7343, - "step": 73640 - }, - { - "epoch": 0.6510900121996499, - "grad_norm": 4.067414283752441, - "learning_rate": 3.9148499796672505e-05, - "loss": 0.6327, - "step": 73650 - }, - { - "epoch": 0.6511784154599621, - "grad_norm": 1.4434269666671753, - "learning_rate": 3.9147026409000634e-05, - "loss": 0.7595, - "step": 73660 - }, - { - "epoch": 0.6512668187202744, - "grad_norm": 2.4540281295776367, - "learning_rate": 3.914555302132876e-05, - "loss": 0.5795, - "step": 73670 - }, - { - "epoch": 0.6513552219805866, - "grad_norm": 4.55037784576416, - "learning_rate": 3.914407963365689e-05, - "loss": 0.5787, - "step": 73680 - }, - { - "epoch": 0.6514436252408989, - "grad_norm": 7.68404483795166, - "learning_rate": 3.914260624598502e-05, - "loss": 0.6487, - "step": 73690 - }, - { - "epoch": 0.6515320285012112, - "grad_norm": 6.309410572052002, - "learning_rate": 3.9141132858313154e-05, - "loss": 0.676, - "step": 73700 - }, - { - "epoch": 0.6516204317615234, - "grad_norm": 4.53164529800415, - "learning_rate": 3.9139659470641276e-05, - "loss": 0.6698, - "step": 73710 - }, - { - "epoch": 0.6517088350218356, - "grad_norm": 4.580211639404297, - "learning_rate": 3.913818608296941e-05, - "loss": 0.7516, - "step": 73720 - }, - { - "epoch": 0.6517972382821479, - "grad_norm": 3.3935134410858154, - "learning_rate": 3.913671269529754e-05, - "loss": 0.6457, - "step": 73730 - }, - { - "epoch": 0.6518856415424601, - "grad_norm": 1.6694892644882202, - "learning_rate": 3.913523930762567e-05, - "loss": 0.5469, - "step": 73740 - }, - { - "epoch": 0.6519740448027723, - "grad_norm": 1.1333941221237183, - "learning_rate": 3.9133765919953796e-05, - "loss": 0.5975, - "step": 73750 - }, - { - "epoch": 0.6520624480630846, - "grad_norm": 0.8881174325942993, - "learning_rate": 3.913229253228193e-05, - "loss": 0.5939, - "step": 73760 - }, - { - "epoch": 0.6521508513233968, - "grad_norm": 3.590424060821533, - "learning_rate": 3.913081914461005e-05, - "loss": 0.6154, - "step": 73770 - }, - { - "epoch": 0.652239254583709, - "grad_norm": 3.809953451156616, - "learning_rate": 3.912934575693819e-05, - "loss": 0.7261, - "step": 73780 - }, - { - "epoch": 0.6523276578440212, - "grad_norm": 8.598770141601562, - "learning_rate": 3.9127872369266316e-05, - "loss": 0.6614, - "step": 73790 - }, - { - "epoch": 0.6524160611043335, - "grad_norm": 15.509154319763184, - "learning_rate": 3.9126398981594444e-05, - "loss": 0.6871, - "step": 73800 - }, - { - "epoch": 0.6525044643646458, - "grad_norm": 9.159502029418945, - "learning_rate": 3.912492559392257e-05, - "loss": 0.8046, - "step": 73810 - }, - { - "epoch": 0.652592867624958, - "grad_norm": 5.595328330993652, - "learning_rate": 3.91234522062507e-05, - "loss": 0.6996, - "step": 73820 - }, - { - "epoch": 0.6526812708852703, - "grad_norm": 2.1299898624420166, - "learning_rate": 3.912197881857883e-05, - "loss": 0.7598, - "step": 73830 - }, - { - "epoch": 0.6527696741455825, - "grad_norm": 2.160871744155884, - "learning_rate": 3.9120505430906964e-05, - "loss": 0.7018, - "step": 73840 - }, - { - "epoch": 0.6528580774058947, - "grad_norm": 4.677453994750977, - "learning_rate": 3.9119032043235086e-05, - "loss": 0.8033, - "step": 73850 - }, - { - "epoch": 0.652946480666207, - "grad_norm": 3.1859958171844482, - "learning_rate": 3.911755865556322e-05, - "loss": 0.7191, - "step": 73860 - }, - { - "epoch": 0.6530348839265192, - "grad_norm": 7.368778228759766, - "learning_rate": 3.911608526789135e-05, - "loss": 0.6873, - "step": 73870 - }, - { - "epoch": 0.6531232871868314, - "grad_norm": 1.461064338684082, - "learning_rate": 3.911461188021948e-05, - "loss": 0.4827, - "step": 73880 - }, - { - "epoch": 0.6532116904471437, - "grad_norm": 1.586313247680664, - "learning_rate": 3.9113138492547606e-05, - "loss": 0.6818, - "step": 73890 - }, - { - "epoch": 0.6533000937074559, - "grad_norm": 5.235528469085693, - "learning_rate": 3.911166510487574e-05, - "loss": 0.6646, - "step": 73900 - }, - { - "epoch": 0.6533884969677681, - "grad_norm": 4.75295352935791, - "learning_rate": 3.911019171720386e-05, - "loss": 0.7481, - "step": 73910 - }, - { - "epoch": 0.6534769002280804, - "grad_norm": 8.453245162963867, - "learning_rate": 3.9108718329532e-05, - "loss": 0.5429, - "step": 73920 - }, - { - "epoch": 0.6535653034883927, - "grad_norm": 6.221120834350586, - "learning_rate": 3.910724494186012e-05, - "loss": 0.6538, - "step": 73930 - }, - { - "epoch": 0.6536537067487049, - "grad_norm": 3.9913480281829834, - "learning_rate": 3.9105771554188255e-05, - "loss": 0.6814, - "step": 73940 - }, - { - "epoch": 0.6537421100090172, - "grad_norm": 2.242767095565796, - "learning_rate": 3.910429816651638e-05, - "loss": 0.6905, - "step": 73950 - }, - { - "epoch": 0.6538305132693294, - "grad_norm": 4.462658882141113, - "learning_rate": 3.910282477884451e-05, - "loss": 0.6583, - "step": 73960 - }, - { - "epoch": 0.6539189165296416, - "grad_norm": 4.621025085449219, - "learning_rate": 3.910135139117264e-05, - "loss": 0.5677, - "step": 73970 - }, - { - "epoch": 0.6540073197899539, - "grad_norm": 1.43351411819458, - "learning_rate": 3.9099878003500775e-05, - "loss": 0.714, - "step": 73980 - }, - { - "epoch": 0.6540957230502661, - "grad_norm": 6.703693866729736, - "learning_rate": 3.9098404615828897e-05, - "loss": 0.6055, - "step": 73990 - }, - { - "epoch": 0.6541841263105783, - "grad_norm": 2.37622332572937, - "learning_rate": 3.909693122815703e-05, - "loss": 0.5538, - "step": 74000 - }, - { - "epoch": 0.6542725295708905, - "grad_norm": 5.703482627868652, - "learning_rate": 3.909545784048516e-05, - "loss": 0.6441, - "step": 74010 - }, - { - "epoch": 0.6543609328312028, - "grad_norm": 2.2979772090911865, - "learning_rate": 3.909398445281329e-05, - "loss": 0.6144, - "step": 74020 - }, - { - "epoch": 0.654449336091515, - "grad_norm": 1.548211693763733, - "learning_rate": 3.909251106514142e-05, - "loss": 0.7007, - "step": 74030 - }, - { - "epoch": 0.6545377393518272, - "grad_norm": 1.5616092681884766, - "learning_rate": 3.909103767746955e-05, - "loss": 0.7027, - "step": 74040 - }, - { - "epoch": 0.6546261426121396, - "grad_norm": 5.873095989227295, - "learning_rate": 3.9089564289797673e-05, - "loss": 0.5587, - "step": 74050 - }, - { - "epoch": 0.6547145458724518, - "grad_norm": 6.141514301300049, - "learning_rate": 3.908809090212581e-05, - "loss": 0.6174, - "step": 74060 - }, - { - "epoch": 0.654802949132764, - "grad_norm": 3.8194026947021484, - "learning_rate": 3.908661751445393e-05, - "loss": 0.6711, - "step": 74070 - }, - { - "epoch": 0.6548913523930763, - "grad_norm": 4.030489444732666, - "learning_rate": 3.9085144126782065e-05, - "loss": 0.5805, - "step": 74080 - }, - { - "epoch": 0.6549797556533885, - "grad_norm": 5.708727836608887, - "learning_rate": 3.9083670739110194e-05, - "loss": 0.7565, - "step": 74090 - }, - { - "epoch": 0.6550681589137007, - "grad_norm": 3.284543037414551, - "learning_rate": 3.908219735143832e-05, - "loss": 0.637, - "step": 74100 - }, - { - "epoch": 0.655156562174013, - "grad_norm": 13.578351974487305, - "learning_rate": 3.908072396376645e-05, - "loss": 0.7715, - "step": 74110 - }, - { - "epoch": 0.6552449654343252, - "grad_norm": 3.3261005878448486, - "learning_rate": 3.9079250576094585e-05, - "loss": 0.7305, - "step": 74120 - }, - { - "epoch": 0.6553333686946374, - "grad_norm": 5.2780022621154785, - "learning_rate": 3.907777718842271e-05, - "loss": 0.7668, - "step": 74130 - }, - { - "epoch": 0.6554217719549497, - "grad_norm": 1.6371046304702759, - "learning_rate": 3.907630380075084e-05, - "loss": 0.6188, - "step": 74140 - }, - { - "epoch": 0.6555101752152619, - "grad_norm": 7.224486351013184, - "learning_rate": 3.907483041307897e-05, - "loss": 0.5678, - "step": 74150 - }, - { - "epoch": 0.6555985784755742, - "grad_norm": 2.072559356689453, - "learning_rate": 3.90733570254071e-05, - "loss": 0.6709, - "step": 74160 - }, - { - "epoch": 0.6556869817358865, - "grad_norm": 7.273366928100586, - "learning_rate": 3.907188363773523e-05, - "loss": 0.5146, - "step": 74170 - }, - { - "epoch": 0.6557753849961987, - "grad_norm": 3.7417361736297607, - "learning_rate": 3.9070410250063356e-05, - "loss": 0.5478, - "step": 74180 - }, - { - "epoch": 0.6558637882565109, - "grad_norm": 3.2216029167175293, - "learning_rate": 3.9068936862391484e-05, - "loss": 0.5563, - "step": 74190 - }, - { - "epoch": 0.6559521915168232, - "grad_norm": 6.048964977264404, - "learning_rate": 3.906746347471962e-05, - "loss": 0.6505, - "step": 74200 - }, - { - "epoch": 0.6560405947771354, - "grad_norm": 1.0378650426864624, - "learning_rate": 3.906599008704774e-05, - "loss": 0.6915, - "step": 74210 - }, - { - "epoch": 0.6561289980374476, - "grad_norm": 2.546952247619629, - "learning_rate": 3.9064516699375876e-05, - "loss": 0.6816, - "step": 74220 - }, - { - "epoch": 0.6562174012977599, - "grad_norm": 1.9041805267333984, - "learning_rate": 3.9063043311704004e-05, - "loss": 0.708, - "step": 74230 - }, - { - "epoch": 0.6563058045580721, - "grad_norm": 3.479835033416748, - "learning_rate": 3.906156992403213e-05, - "loss": 0.6829, - "step": 74240 - }, - { - "epoch": 0.6563942078183843, - "grad_norm": 3.2493185997009277, - "learning_rate": 3.906009653636026e-05, - "loss": 0.7421, - "step": 74250 - }, - { - "epoch": 0.6564826110786965, - "grad_norm": 5.673702716827393, - "learning_rate": 3.9058623148688396e-05, - "loss": 0.5329, - "step": 74260 - }, - { - "epoch": 0.6565710143390088, - "grad_norm": 2.299133539199829, - "learning_rate": 3.905714976101652e-05, - "loss": 0.648, - "step": 74270 - }, - { - "epoch": 0.6566594175993211, - "grad_norm": 4.86217737197876, - "learning_rate": 3.905567637334465e-05, - "loss": 0.8056, - "step": 74280 - }, - { - "epoch": 0.6567478208596333, - "grad_norm": 1.1120375394821167, - "learning_rate": 3.9054202985672774e-05, - "loss": 0.7625, - "step": 74290 - }, - { - "epoch": 0.6568362241199456, - "grad_norm": 5.428472518920898, - "learning_rate": 3.905272959800091e-05, - "loss": 0.5938, - "step": 74300 - }, - { - "epoch": 0.6569246273802578, - "grad_norm": 3.106133222579956, - "learning_rate": 3.905125621032904e-05, - "loss": 0.5564, - "step": 74310 - }, - { - "epoch": 0.65701303064057, - "grad_norm": 3.6658782958984375, - "learning_rate": 3.9049782822657166e-05, - "loss": 0.781, - "step": 74320 - }, - { - "epoch": 0.6571014339008823, - "grad_norm": 4.764915943145752, - "learning_rate": 3.9048309434985294e-05, - "loss": 0.5713, - "step": 74330 - }, - { - "epoch": 0.6571898371611945, - "grad_norm": 1.1686493158340454, - "learning_rate": 3.904683604731343e-05, - "loss": 0.6478, - "step": 74340 - }, - { - "epoch": 0.6572782404215067, - "grad_norm": 8.3425931930542, - "learning_rate": 3.904536265964155e-05, - "loss": 0.8412, - "step": 74350 - }, - { - "epoch": 0.657366643681819, - "grad_norm": 6.874490261077881, - "learning_rate": 3.9043889271969686e-05, - "loss": 0.6412, - "step": 74360 - }, - { - "epoch": 0.6574550469421312, - "grad_norm": 5.611931324005127, - "learning_rate": 3.9042415884297815e-05, - "loss": 0.7604, - "step": 74370 - }, - { - "epoch": 0.6575434502024434, - "grad_norm": 2.853137969970703, - "learning_rate": 3.904094249662594e-05, - "loss": 0.5872, - "step": 74380 - }, - { - "epoch": 0.6576318534627557, - "grad_norm": 3.651102304458618, - "learning_rate": 3.903946910895407e-05, - "loss": 0.6484, - "step": 74390 - }, - { - "epoch": 0.657720256723068, - "grad_norm": 2.713616132736206, - "learning_rate": 3.90379957212822e-05, - "loss": 0.5957, - "step": 74400 - }, - { - "epoch": 0.6578086599833802, - "grad_norm": 2.728940963745117, - "learning_rate": 3.903652233361033e-05, - "loss": 0.7185, - "step": 74410 - }, - { - "epoch": 0.6578970632436925, - "grad_norm": 1.8137192726135254, - "learning_rate": 3.903504894593846e-05, - "loss": 0.6176, - "step": 74420 - }, - { - "epoch": 0.6579854665040047, - "grad_norm": 5.03383731842041, - "learning_rate": 3.903357555826659e-05, - "loss": 0.6772, - "step": 74430 - }, - { - "epoch": 0.6580738697643169, - "grad_norm": 4.3429999351501465, - "learning_rate": 3.903210217059472e-05, - "loss": 0.6096, - "step": 74440 - }, - { - "epoch": 0.6581622730246292, - "grad_norm": 2.540360927581787, - "learning_rate": 3.903062878292285e-05, - "loss": 0.7183, - "step": 74450 - }, - { - "epoch": 0.6582506762849414, - "grad_norm": 16.79432487487793, - "learning_rate": 3.9029155395250977e-05, - "loss": 0.6725, - "step": 74460 - }, - { - "epoch": 0.6583390795452536, - "grad_norm": 1.5936205387115479, - "learning_rate": 3.9027682007579105e-05, - "loss": 0.7244, - "step": 74470 - }, - { - "epoch": 0.6584274828055658, - "grad_norm": 3.5982398986816406, - "learning_rate": 3.902620861990724e-05, - "loss": 0.6549, - "step": 74480 - }, - { - "epoch": 0.6585158860658781, - "grad_norm": 2.0425920486450195, - "learning_rate": 3.902473523223537e-05, - "loss": 0.6661, - "step": 74490 - }, - { - "epoch": 0.6586042893261903, - "grad_norm": 3.9640567302703857, - "learning_rate": 3.90232618445635e-05, - "loss": 0.6676, - "step": 74500 - }, - { - "epoch": 0.6586926925865025, - "grad_norm": 3.5814859867095947, - "learning_rate": 3.9021788456891625e-05, - "loss": 0.5836, - "step": 74510 - }, - { - "epoch": 0.6587810958468149, - "grad_norm": 5.417182922363281, - "learning_rate": 3.9020315069219753e-05, - "loss": 0.5923, - "step": 74520 - }, - { - "epoch": 0.6588694991071271, - "grad_norm": 5.457121849060059, - "learning_rate": 3.901884168154788e-05, - "loss": 0.6721, - "step": 74530 - }, - { - "epoch": 0.6589579023674393, - "grad_norm": 3.9403765201568604, - "learning_rate": 3.901736829387601e-05, - "loss": 0.7197, - "step": 74540 - }, - { - "epoch": 0.6590463056277516, - "grad_norm": 3.137930154800415, - "learning_rate": 3.9015894906204145e-05, - "loss": 0.7591, - "step": 74550 - }, - { - "epoch": 0.6591347088880638, - "grad_norm": 4.644708156585693, - "learning_rate": 3.9014421518532274e-05, - "loss": 0.6787, - "step": 74560 - }, - { - "epoch": 0.659223112148376, - "grad_norm": 1.615928292274475, - "learning_rate": 3.90129481308604e-05, - "loss": 0.6022, - "step": 74570 - }, - { - "epoch": 0.6593115154086883, - "grad_norm": 3.498143434524536, - "learning_rate": 3.901147474318853e-05, - "loss": 0.7551, - "step": 74580 - }, - { - "epoch": 0.6593999186690005, - "grad_norm": 1.796372652053833, - "learning_rate": 3.901000135551666e-05, - "loss": 0.7387, - "step": 74590 - }, - { - "epoch": 0.6594883219293127, - "grad_norm": 6.777951240539551, - "learning_rate": 3.900852796784479e-05, - "loss": 0.7005, - "step": 74600 - }, - { - "epoch": 0.659576725189625, - "grad_norm": 8.958002090454102, - "learning_rate": 3.900705458017292e-05, - "loss": 0.7769, - "step": 74610 - }, - { - "epoch": 0.6596651284499372, - "grad_norm": 1.4068703651428223, - "learning_rate": 3.900558119250105e-05, - "loss": 0.7339, - "step": 74620 - }, - { - "epoch": 0.6597535317102494, - "grad_norm": 7.773104190826416, - "learning_rate": 3.900410780482918e-05, - "loss": 0.6199, - "step": 74630 - }, - { - "epoch": 0.6598419349705618, - "grad_norm": 2.2941694259643555, - "learning_rate": 3.900263441715731e-05, - "loss": 0.7006, - "step": 74640 - }, - { - "epoch": 0.659930338230874, - "grad_norm": 1.6014699935913086, - "learning_rate": 3.9001161029485436e-05, - "loss": 0.6841, - "step": 74650 - }, - { - "epoch": 0.6600187414911862, - "grad_norm": 2.185286521911621, - "learning_rate": 3.8999687641813564e-05, - "loss": 0.6551, - "step": 74660 - }, - { - "epoch": 0.6601071447514985, - "grad_norm": 7.959512710571289, - "learning_rate": 3.89982142541417e-05, - "loss": 0.6432, - "step": 74670 - }, - { - "epoch": 0.6601955480118107, - "grad_norm": 4.725872993469238, - "learning_rate": 3.899674086646982e-05, - "loss": 0.6286, - "step": 74680 - }, - { - "epoch": 0.6602839512721229, - "grad_norm": 7.8982415199279785, - "learning_rate": 3.8995267478797956e-05, - "loss": 0.6208, - "step": 74690 - }, - { - "epoch": 0.6603723545324351, - "grad_norm": 2.8995907306671143, - "learning_rate": 3.8993794091126084e-05, - "loss": 0.6577, - "step": 74700 - }, - { - "epoch": 0.6604607577927474, - "grad_norm": 3.0823237895965576, - "learning_rate": 3.899232070345421e-05, - "loss": 0.6799, - "step": 74710 - }, - { - "epoch": 0.6605491610530596, - "grad_norm": 7.6601176261901855, - "learning_rate": 3.899084731578234e-05, - "loss": 0.7882, - "step": 74720 - }, - { - "epoch": 0.6606375643133718, - "grad_norm": 4.167365074157715, - "learning_rate": 3.8989373928110476e-05, - "loss": 0.6947, - "step": 74730 - }, - { - "epoch": 0.6607259675736841, - "grad_norm": 3.089801073074341, - "learning_rate": 3.89879005404386e-05, - "loss": 0.6315, - "step": 74740 - }, - { - "epoch": 0.6608143708339964, - "grad_norm": 4.085617542266846, - "learning_rate": 3.898642715276673e-05, - "loss": 0.673, - "step": 74750 - }, - { - "epoch": 0.6609027740943086, - "grad_norm": 3.7830417156219482, - "learning_rate": 3.8984953765094854e-05, - "loss": 0.6371, - "step": 74760 - }, - { - "epoch": 0.6609911773546209, - "grad_norm": 2.098037004470825, - "learning_rate": 3.898348037742299e-05, - "loss": 0.7509, - "step": 74770 - }, - { - "epoch": 0.6610795806149331, - "grad_norm": 4.651118755340576, - "learning_rate": 3.898200698975112e-05, - "loss": 0.7642, - "step": 74780 - }, - { - "epoch": 0.6611679838752453, - "grad_norm": 3.37223482131958, - "learning_rate": 3.8980533602079246e-05, - "loss": 0.6491, - "step": 74790 - }, - { - "epoch": 0.6612563871355576, - "grad_norm": 9.367709159851074, - "learning_rate": 3.8979060214407375e-05, - "loss": 0.7687, - "step": 74800 - }, - { - "epoch": 0.6613447903958698, - "grad_norm": 8.562145233154297, - "learning_rate": 3.897758682673551e-05, - "loss": 0.7634, - "step": 74810 - }, - { - "epoch": 0.661433193656182, - "grad_norm": 3.6404943466186523, - "learning_rate": 3.897611343906363e-05, - "loss": 0.7594, - "step": 74820 - }, - { - "epoch": 0.6615215969164943, - "grad_norm": 1.6759766340255737, - "learning_rate": 3.8974640051391766e-05, - "loss": 0.6646, - "step": 74830 - }, - { - "epoch": 0.6616100001768065, - "grad_norm": 7.769847393035889, - "learning_rate": 3.8973166663719895e-05, - "loss": 0.6609, - "step": 74840 - }, - { - "epoch": 0.6616984034371187, - "grad_norm": 4.607982635498047, - "learning_rate": 3.897169327604802e-05, - "loss": 0.7208, - "step": 74850 - }, - { - "epoch": 0.661786806697431, - "grad_norm": 5.989676475524902, - "learning_rate": 3.897021988837615e-05, - "loss": 0.6603, - "step": 74860 - }, - { - "epoch": 0.6618752099577433, - "grad_norm": 7.331932067871094, - "learning_rate": 3.896874650070428e-05, - "loss": 0.8526, - "step": 74870 - }, - { - "epoch": 0.6619636132180555, - "grad_norm": 3.6298041343688965, - "learning_rate": 3.896727311303241e-05, - "loss": 0.7313, - "step": 74880 - }, - { - "epoch": 0.6620520164783678, - "grad_norm": 5.4664692878723145, - "learning_rate": 3.896579972536054e-05, - "loss": 0.6411, - "step": 74890 - }, - { - "epoch": 0.66214041973868, - "grad_norm": 5.398388385772705, - "learning_rate": 3.8964326337688665e-05, - "loss": 0.6359, - "step": 74900 - }, - { - "epoch": 0.6622288229989922, - "grad_norm": 5.649899482727051, - "learning_rate": 3.89628529500168e-05, - "loss": 0.6118, - "step": 74910 - }, - { - "epoch": 0.6623172262593044, - "grad_norm": 4.014989852905273, - "learning_rate": 3.896137956234493e-05, - "loss": 0.6763, - "step": 74920 - }, - { - "epoch": 0.6624056295196167, - "grad_norm": 2.751934766769409, - "learning_rate": 3.895990617467306e-05, - "loss": 0.6632, - "step": 74930 - }, - { - "epoch": 0.6624940327799289, - "grad_norm": 3.3606157302856445, - "learning_rate": 3.8958432787001185e-05, - "loss": 0.7213, - "step": 74940 - }, - { - "epoch": 0.6625824360402411, - "grad_norm": 4.998013973236084, - "learning_rate": 3.895695939932932e-05, - "loss": 0.863, - "step": 74950 - }, - { - "epoch": 0.6626708393005534, - "grad_norm": 3.9802145957946777, - "learning_rate": 3.895548601165744e-05, - "loss": 0.6845, - "step": 74960 - }, - { - "epoch": 0.6627592425608656, - "grad_norm": 5.25894832611084, - "learning_rate": 3.895401262398558e-05, - "loss": 0.7301, - "step": 74970 - }, - { - "epoch": 0.6628476458211778, - "grad_norm": 1.4403324127197266, - "learning_rate": 3.8952539236313705e-05, - "loss": 0.7469, - "step": 74980 - }, - { - "epoch": 0.6629360490814902, - "grad_norm": 2.2025909423828125, - "learning_rate": 3.8951065848641834e-05, - "loss": 0.595, - "step": 74990 - }, - { - "epoch": 0.6630244523418024, - "grad_norm": 1.9643757343292236, - "learning_rate": 3.894959246096996e-05, - "loss": 0.639, - "step": 75000 - }, - { - "epoch": 0.6631128556021146, - "grad_norm": 6.522768020629883, - "learning_rate": 3.894811907329809e-05, - "loss": 0.6624, - "step": 75010 - }, - { - "epoch": 0.6632012588624269, - "grad_norm": 1.6482665538787842, - "learning_rate": 3.894664568562622e-05, - "loss": 0.6221, - "step": 75020 - }, - { - "epoch": 0.6632896621227391, - "grad_norm": 6.372900009155273, - "learning_rate": 3.8945172297954354e-05, - "loss": 0.6881, - "step": 75030 - }, - { - "epoch": 0.6633780653830513, - "grad_norm": 3.927379608154297, - "learning_rate": 3.8943698910282475e-05, - "loss": 0.6885, - "step": 75040 - }, - { - "epoch": 0.6634664686433636, - "grad_norm": 17.94007682800293, - "learning_rate": 3.894222552261061e-05, - "loss": 0.6843, - "step": 75050 - }, - { - "epoch": 0.6635548719036758, - "grad_norm": 3.019455909729004, - "learning_rate": 3.894075213493874e-05, - "loss": 0.8003, - "step": 75060 - }, - { - "epoch": 0.663643275163988, - "grad_norm": 6.074705600738525, - "learning_rate": 3.893927874726687e-05, - "loss": 0.6351, - "step": 75070 - }, - { - "epoch": 0.6637316784243003, - "grad_norm": 2.8690683841705322, - "learning_rate": 3.8937805359594996e-05, - "loss": 0.5528, - "step": 75080 - }, - { - "epoch": 0.6638200816846125, - "grad_norm": 3.4551234245300293, - "learning_rate": 3.893633197192313e-05, - "loss": 0.7532, - "step": 75090 - }, - { - "epoch": 0.6639084849449247, - "grad_norm": 3.0580084323883057, - "learning_rate": 3.893485858425125e-05, - "loss": 0.6613, - "step": 75100 - }, - { - "epoch": 0.6639968882052371, - "grad_norm": 6.894099712371826, - "learning_rate": 3.893338519657939e-05, - "loss": 0.6883, - "step": 75110 - }, - { - "epoch": 0.6640852914655493, - "grad_norm": 4.7544732093811035, - "learning_rate": 3.893191180890751e-05, - "loss": 0.6124, - "step": 75120 - }, - { - "epoch": 0.6641736947258615, - "grad_norm": 1.909325122833252, - "learning_rate": 3.8930438421235644e-05, - "loss": 0.8038, - "step": 75130 - }, - { - "epoch": 0.6642620979861737, - "grad_norm": 5.189207077026367, - "learning_rate": 3.892896503356377e-05, - "loss": 0.5643, - "step": 75140 - }, - { - "epoch": 0.664350501246486, - "grad_norm": 4.618427753448486, - "learning_rate": 3.89274916458919e-05, - "loss": 0.7983, - "step": 75150 - }, - { - "epoch": 0.6644389045067982, - "grad_norm": 1.8668888807296753, - "learning_rate": 3.892601825822003e-05, - "loss": 0.6527, - "step": 75160 - }, - { - "epoch": 0.6645273077671104, - "grad_norm": 2.950101613998413, - "learning_rate": 3.8924544870548164e-05, - "loss": 0.6145, - "step": 75170 - }, - { - "epoch": 0.6646157110274227, - "grad_norm": 9.534425735473633, - "learning_rate": 3.8923071482876286e-05, - "loss": 0.7393, - "step": 75180 - }, - { - "epoch": 0.6647041142877349, - "grad_norm": 1.30713951587677, - "learning_rate": 3.892159809520442e-05, - "loss": 0.627, - "step": 75190 - }, - { - "epoch": 0.6647925175480471, - "grad_norm": 2.4795477390289307, - "learning_rate": 3.892012470753255e-05, - "loss": 0.592, - "step": 75200 - }, - { - "epoch": 0.6648809208083594, - "grad_norm": 3.4508309364318848, - "learning_rate": 3.891865131986068e-05, - "loss": 0.7174, - "step": 75210 - }, - { - "epoch": 0.6649693240686717, - "grad_norm": 9.213872909545898, - "learning_rate": 3.8917177932188806e-05, - "loss": 0.6935, - "step": 75220 - }, - { - "epoch": 0.6650577273289839, - "grad_norm": 4.244976043701172, - "learning_rate": 3.8915704544516934e-05, - "loss": 0.8166, - "step": 75230 - }, - { - "epoch": 0.6651461305892962, - "grad_norm": 6.809774875640869, - "learning_rate": 3.891423115684506e-05, - "loss": 0.7052, - "step": 75240 - }, - { - "epoch": 0.6652345338496084, - "grad_norm": 1.0831025838851929, - "learning_rate": 3.89127577691732e-05, - "loss": 0.6503, - "step": 75250 - }, - { - "epoch": 0.6653229371099206, - "grad_norm": 5.4457902908325195, - "learning_rate": 3.891128438150132e-05, - "loss": 0.8292, - "step": 75260 - }, - { - "epoch": 0.6654113403702329, - "grad_norm": 14.615921974182129, - "learning_rate": 3.8909810993829455e-05, - "loss": 0.5816, - "step": 75270 - }, - { - "epoch": 0.6654997436305451, - "grad_norm": 6.300139904022217, - "learning_rate": 3.890833760615758e-05, - "loss": 0.7312, - "step": 75280 - }, - { - "epoch": 0.6655881468908573, - "grad_norm": 1.539911150932312, - "learning_rate": 3.890686421848571e-05, - "loss": 0.6257, - "step": 75290 - }, - { - "epoch": 0.6656765501511696, - "grad_norm": 2.3931667804718018, - "learning_rate": 3.890539083081384e-05, - "loss": 0.6081, - "step": 75300 - }, - { - "epoch": 0.6657649534114818, - "grad_norm": 1.514052391052246, - "learning_rate": 3.8903917443141975e-05, - "loss": 0.6673, - "step": 75310 - }, - { - "epoch": 0.665853356671794, - "grad_norm": 1.3385413885116577, - "learning_rate": 3.89024440554701e-05, - "loss": 0.5597, - "step": 75320 - }, - { - "epoch": 0.6659417599321062, - "grad_norm": 2.084204912185669, - "learning_rate": 3.890097066779823e-05, - "loss": 0.5735, - "step": 75330 - }, - { - "epoch": 0.6660301631924186, - "grad_norm": 2.842465877532959, - "learning_rate": 3.889949728012636e-05, - "loss": 0.636, - "step": 75340 - }, - { - "epoch": 0.6661185664527308, - "grad_norm": 7.057918071746826, - "learning_rate": 3.889802389245449e-05, - "loss": 0.6449, - "step": 75350 - }, - { - "epoch": 0.666206969713043, - "grad_norm": 0.6593702435493469, - "learning_rate": 3.8896550504782617e-05, - "loss": 0.7469, - "step": 75360 - }, - { - "epoch": 0.6662953729733553, - "grad_norm": 5.400606632232666, - "learning_rate": 3.8895077117110745e-05, - "loss": 0.6287, - "step": 75370 - }, - { - "epoch": 0.6663837762336675, - "grad_norm": 4.266366004943848, - "learning_rate": 3.889360372943888e-05, - "loss": 0.7159, - "step": 75380 - }, - { - "epoch": 0.6664721794939797, - "grad_norm": 3.478417158126831, - "learning_rate": 3.889213034176701e-05, - "loss": 0.8176, - "step": 75390 - }, - { - "epoch": 0.666560582754292, - "grad_norm": 4.912135601043701, - "learning_rate": 3.889065695409514e-05, - "loss": 0.7509, - "step": 75400 - }, - { - "epoch": 0.6666489860146042, - "grad_norm": 6.7844438552856445, - "learning_rate": 3.8889183566423265e-05, - "loss": 0.7294, - "step": 75410 - }, - { - "epoch": 0.6667373892749164, - "grad_norm": 2.0250132083892822, - "learning_rate": 3.8887710178751393e-05, - "loss": 0.6866, - "step": 75420 - }, - { - "epoch": 0.6668257925352287, - "grad_norm": 3.1584935188293457, - "learning_rate": 3.888623679107952e-05, - "loss": 0.5881, - "step": 75430 - }, - { - "epoch": 0.6669141957955409, - "grad_norm": 1.4282203912734985, - "learning_rate": 3.888476340340766e-05, - "loss": 0.5731, - "step": 75440 - }, - { - "epoch": 0.6670025990558531, - "grad_norm": 7.167487621307373, - "learning_rate": 3.8883290015735785e-05, - "loss": 0.7159, - "step": 75450 - }, - { - "epoch": 0.6670910023161655, - "grad_norm": 4.460699558258057, - "learning_rate": 3.8881816628063914e-05, - "loss": 0.625, - "step": 75460 - }, - { - "epoch": 0.6671794055764777, - "grad_norm": 1.3844077587127686, - "learning_rate": 3.888034324039204e-05, - "loss": 0.6857, - "step": 75470 - }, - { - "epoch": 0.6672678088367899, - "grad_norm": 8.716390609741211, - "learning_rate": 3.887886985272017e-05, - "loss": 0.6292, - "step": 75480 - }, - { - "epoch": 0.6673562120971022, - "grad_norm": 4.229059219360352, - "learning_rate": 3.88773964650483e-05, - "loss": 0.6051, - "step": 75490 - }, - { - "epoch": 0.6674446153574144, - "grad_norm": 4.041706085205078, - "learning_rate": 3.8875923077376434e-05, - "loss": 0.731, - "step": 75500 - }, - { - "epoch": 0.6675330186177266, - "grad_norm": 6.528076648712158, - "learning_rate": 3.8874449689704555e-05, - "loss": 0.6065, - "step": 75510 - }, - { - "epoch": 0.6676214218780389, - "grad_norm": 1.0240939855575562, - "learning_rate": 3.887297630203269e-05, - "loss": 0.6556, - "step": 75520 - }, - { - "epoch": 0.6677098251383511, - "grad_norm": 1.8203481435775757, - "learning_rate": 3.887150291436082e-05, - "loss": 0.7946, - "step": 75530 - }, - { - "epoch": 0.6677982283986633, - "grad_norm": 21.99590492248535, - "learning_rate": 3.887002952668895e-05, - "loss": 0.5838, - "step": 75540 - }, - { - "epoch": 0.6678866316589755, - "grad_norm": 2.1583988666534424, - "learning_rate": 3.8868556139017076e-05, - "loss": 0.689, - "step": 75550 - }, - { - "epoch": 0.6679750349192878, - "grad_norm": 2.108670234680176, - "learning_rate": 3.886708275134521e-05, - "loss": 0.5782, - "step": 75560 - }, - { - "epoch": 0.6680634381796, - "grad_norm": 6.648342609405518, - "learning_rate": 3.886560936367333e-05, - "loss": 0.6422, - "step": 75570 - }, - { - "epoch": 0.6681518414399124, - "grad_norm": 4.114011287689209, - "learning_rate": 3.886413597600147e-05, - "loss": 0.7424, - "step": 75580 - }, - { - "epoch": 0.6682402447002246, - "grad_norm": 2.9584524631500244, - "learning_rate": 3.886266258832959e-05, - "loss": 0.6906, - "step": 75590 - }, - { - "epoch": 0.6683286479605368, - "grad_norm": 1.5455824136734009, - "learning_rate": 3.8861189200657724e-05, - "loss": 0.6834, - "step": 75600 - }, - { - "epoch": 0.668417051220849, - "grad_norm": 3.160207986831665, - "learning_rate": 3.885971581298585e-05, - "loss": 0.6911, - "step": 75610 - }, - { - "epoch": 0.6685054544811613, - "grad_norm": 5.8109822273254395, - "learning_rate": 3.885824242531398e-05, - "loss": 0.6734, - "step": 75620 - }, - { - "epoch": 0.6685938577414735, - "grad_norm": 3.8840224742889404, - "learning_rate": 3.885676903764211e-05, - "loss": 0.7482, - "step": 75630 - }, - { - "epoch": 0.6686822610017857, - "grad_norm": 4.629284381866455, - "learning_rate": 3.8855295649970244e-05, - "loss": 0.6803, - "step": 75640 - }, - { - "epoch": 0.668770664262098, - "grad_norm": 6.9976396560668945, - "learning_rate": 3.8853822262298366e-05, - "loss": 0.717, - "step": 75650 - }, - { - "epoch": 0.6688590675224102, - "grad_norm": 3.005126476287842, - "learning_rate": 3.88523488746265e-05, - "loss": 0.6389, - "step": 75660 - }, - { - "epoch": 0.6689474707827224, - "grad_norm": 2.950073003768921, - "learning_rate": 3.885087548695463e-05, - "loss": 0.7223, - "step": 75670 - }, - { - "epoch": 0.6690358740430347, - "grad_norm": 7.066815376281738, - "learning_rate": 3.884940209928276e-05, - "loss": 0.6523, - "step": 75680 - }, - { - "epoch": 0.6691242773033469, - "grad_norm": 1.7473678588867188, - "learning_rate": 3.8847928711610886e-05, - "loss": 0.6794, - "step": 75690 - }, - { - "epoch": 0.6692126805636592, - "grad_norm": 3.887746810913086, - "learning_rate": 3.8846455323939014e-05, - "loss": 0.6326, - "step": 75700 - }, - { - "epoch": 0.6693010838239715, - "grad_norm": 3.1705639362335205, - "learning_rate": 3.884498193626714e-05, - "loss": 0.6652, - "step": 75710 - }, - { - "epoch": 0.6693894870842837, - "grad_norm": 8.686491012573242, - "learning_rate": 3.884350854859528e-05, - "loss": 0.7387, - "step": 75720 - }, - { - "epoch": 0.6694778903445959, - "grad_norm": 0.8560713529586792, - "learning_rate": 3.88420351609234e-05, - "loss": 0.5385, - "step": 75730 - }, - { - "epoch": 0.6695662936049082, - "grad_norm": 0.9194939732551575, - "learning_rate": 3.8840561773251535e-05, - "loss": 0.6358, - "step": 75740 - }, - { - "epoch": 0.6696546968652204, - "grad_norm": 2.2076425552368164, - "learning_rate": 3.883908838557966e-05, - "loss": 0.6917, - "step": 75750 - }, - { - "epoch": 0.6697431001255326, - "grad_norm": 2.779151201248169, - "learning_rate": 3.883761499790779e-05, - "loss": 0.6635, - "step": 75760 - }, - { - "epoch": 0.6698315033858449, - "grad_norm": 3.504073143005371, - "learning_rate": 3.883614161023592e-05, - "loss": 0.5528, - "step": 75770 - }, - { - "epoch": 0.6699199066461571, - "grad_norm": 2.029125452041626, - "learning_rate": 3.8834668222564055e-05, - "loss": 0.639, - "step": 75780 - }, - { - "epoch": 0.6700083099064693, - "grad_norm": 6.529359817504883, - "learning_rate": 3.8833194834892176e-05, - "loss": 0.6525, - "step": 75790 - }, - { - "epoch": 0.6700967131667815, - "grad_norm": 0.9420561790466309, - "learning_rate": 3.883172144722031e-05, - "loss": 0.5379, - "step": 75800 - }, - { - "epoch": 0.6701851164270939, - "grad_norm": 5.292688846588135, - "learning_rate": 3.883024805954843e-05, - "loss": 0.6137, - "step": 75810 - }, - { - "epoch": 0.6702735196874061, - "grad_norm": 3.3520727157592773, - "learning_rate": 3.882877467187657e-05, - "loss": 0.6021, - "step": 75820 - }, - { - "epoch": 0.6703619229477183, - "grad_norm": 1.5326029062271118, - "learning_rate": 3.8827301284204697e-05, - "loss": 0.5959, - "step": 75830 - }, - { - "epoch": 0.6704503262080306, - "grad_norm": 3.817713975906372, - "learning_rate": 3.8825827896532825e-05, - "loss": 0.6685, - "step": 75840 - }, - { - "epoch": 0.6705387294683428, - "grad_norm": 2.552400827407837, - "learning_rate": 3.882435450886095e-05, - "loss": 0.7599, - "step": 75850 - }, - { - "epoch": 0.670627132728655, - "grad_norm": 5.682801246643066, - "learning_rate": 3.882288112118909e-05, - "loss": 0.704, - "step": 75860 - }, - { - "epoch": 0.6707155359889673, - "grad_norm": 2.2050187587738037, - "learning_rate": 3.882140773351721e-05, - "loss": 0.7109, - "step": 75870 - }, - { - "epoch": 0.6708039392492795, - "grad_norm": 3.0417778491973877, - "learning_rate": 3.8819934345845345e-05, - "loss": 0.7757, - "step": 75880 - }, - { - "epoch": 0.6708923425095917, - "grad_norm": 2.154115676879883, - "learning_rate": 3.8818460958173474e-05, - "loss": 0.6801, - "step": 75890 - }, - { - "epoch": 0.670980745769904, - "grad_norm": 4.734131336212158, - "learning_rate": 3.88169875705016e-05, - "loss": 0.6568, - "step": 75900 - }, - { - "epoch": 0.6710691490302162, - "grad_norm": 1.9785854816436768, - "learning_rate": 3.881551418282973e-05, - "loss": 0.6523, - "step": 75910 - }, - { - "epoch": 0.6711575522905284, - "grad_norm": 8.007513999938965, - "learning_rate": 3.8814040795157865e-05, - "loss": 0.8287, - "step": 75920 - }, - { - "epoch": 0.6712459555508408, - "grad_norm": 2.4354846477508545, - "learning_rate": 3.881256740748599e-05, - "loss": 0.6888, - "step": 75930 - }, - { - "epoch": 0.671334358811153, - "grad_norm": 5.224363327026367, - "learning_rate": 3.881109401981412e-05, - "loss": 0.7643, - "step": 75940 - }, - { - "epoch": 0.6714227620714652, - "grad_norm": 1.7246884107589722, - "learning_rate": 3.8809620632142244e-05, - "loss": 0.645, - "step": 75950 - }, - { - "epoch": 0.6715111653317775, - "grad_norm": 2.9604434967041016, - "learning_rate": 3.880814724447038e-05, - "loss": 0.7274, - "step": 75960 - }, - { - "epoch": 0.6715995685920897, - "grad_norm": 2.0970165729522705, - "learning_rate": 3.880667385679851e-05, - "loss": 0.5855, - "step": 75970 - }, - { - "epoch": 0.6716879718524019, - "grad_norm": 3.185828924179077, - "learning_rate": 3.8805200469126635e-05, - "loss": 0.6886, - "step": 75980 - }, - { - "epoch": 0.6717763751127142, - "grad_norm": 4.420489311218262, - "learning_rate": 3.8803727081454764e-05, - "loss": 0.7147, - "step": 75990 - }, - { - "epoch": 0.6718647783730264, - "grad_norm": 6.536433219909668, - "learning_rate": 3.88022536937829e-05, - "loss": 0.66, - "step": 76000 - }, - { - "epoch": 0.6719531816333386, - "grad_norm": 2.0639824867248535, - "learning_rate": 3.880078030611102e-05, - "loss": 0.6225, - "step": 76010 - }, - { - "epoch": 0.6720415848936508, - "grad_norm": 4.866724014282227, - "learning_rate": 3.8799306918439156e-05, - "loss": 0.6275, - "step": 76020 - }, - { - "epoch": 0.6721299881539631, - "grad_norm": 15.086483001708984, - "learning_rate": 3.8797833530767284e-05, - "loss": 0.5561, - "step": 76030 - }, - { - "epoch": 0.6722183914142753, - "grad_norm": 20.65117645263672, - "learning_rate": 3.879636014309541e-05, - "loss": 0.5579, - "step": 76040 - }, - { - "epoch": 0.6723067946745876, - "grad_norm": 1.189677357673645, - "learning_rate": 3.879488675542354e-05, - "loss": 0.7201, - "step": 76050 - }, - { - "epoch": 0.6723951979348999, - "grad_norm": 2.507047414779663, - "learning_rate": 3.879341336775167e-05, - "loss": 0.7802, - "step": 76060 - }, - { - "epoch": 0.6724836011952121, - "grad_norm": 5.809626579284668, - "learning_rate": 3.87919399800798e-05, - "loss": 0.6804, - "step": 76070 - }, - { - "epoch": 0.6725720044555243, - "grad_norm": 4.820792198181152, - "learning_rate": 3.879046659240793e-05, - "loss": 0.5915, - "step": 76080 - }, - { - "epoch": 0.6726604077158366, - "grad_norm": 1.4262391328811646, - "learning_rate": 3.8788993204736054e-05, - "loss": 0.78, - "step": 76090 - }, - { - "epoch": 0.6727488109761488, - "grad_norm": 5.81444787979126, - "learning_rate": 3.878751981706419e-05, - "loss": 0.616, - "step": 76100 - }, - { - "epoch": 0.672837214236461, - "grad_norm": 10.396108627319336, - "learning_rate": 3.878604642939232e-05, - "loss": 0.743, - "step": 76110 - }, - { - "epoch": 0.6729256174967733, - "grad_norm": 3.6961781978607178, - "learning_rate": 3.8784573041720446e-05, - "loss": 0.5824, - "step": 76120 - }, - { - "epoch": 0.6730140207570855, - "grad_norm": 5.650369167327881, - "learning_rate": 3.8783099654048574e-05, - "loss": 0.8155, - "step": 76130 - }, - { - "epoch": 0.6731024240173977, - "grad_norm": 8.80875015258789, - "learning_rate": 3.878162626637671e-05, - "loss": 0.7756, - "step": 76140 - }, - { - "epoch": 0.67319082727771, - "grad_norm": 5.947559833526611, - "learning_rate": 3.878015287870483e-05, - "loss": 0.737, - "step": 76150 - }, - { - "epoch": 0.6732792305380222, - "grad_norm": 13.889673233032227, - "learning_rate": 3.8778679491032966e-05, - "loss": 0.6277, - "step": 76160 - }, - { - "epoch": 0.6733676337983345, - "grad_norm": 4.561303615570068, - "learning_rate": 3.8777206103361095e-05, - "loss": 0.5821, - "step": 76170 - }, - { - "epoch": 0.6734560370586468, - "grad_norm": 1.242016315460205, - "learning_rate": 3.877573271568922e-05, - "loss": 0.5489, - "step": 76180 - }, - { - "epoch": 0.673544440318959, - "grad_norm": 4.189915180206299, - "learning_rate": 3.877425932801735e-05, - "loss": 0.7306, - "step": 76190 - }, - { - "epoch": 0.6736328435792712, - "grad_norm": 6.442260265350342, - "learning_rate": 3.877278594034548e-05, - "loss": 0.5927, - "step": 76200 - }, - { - "epoch": 0.6737212468395835, - "grad_norm": 6.389708518981934, - "learning_rate": 3.877131255267361e-05, - "loss": 0.6212, - "step": 76210 - }, - { - "epoch": 0.6738096500998957, - "grad_norm": 8.660594940185547, - "learning_rate": 3.876983916500174e-05, - "loss": 0.6111, - "step": 76220 - }, - { - "epoch": 0.6738980533602079, - "grad_norm": 2.4078876972198486, - "learning_rate": 3.876836577732987e-05, - "loss": 0.7935, - "step": 76230 - }, - { - "epoch": 0.6739864566205201, - "grad_norm": 1.8436461687088013, - "learning_rate": 3.8766892389658e-05, - "loss": 0.7178, - "step": 76240 - }, - { - "epoch": 0.6740748598808324, - "grad_norm": 1.7624011039733887, - "learning_rate": 3.876541900198613e-05, - "loss": 0.6913, - "step": 76250 - }, - { - "epoch": 0.6741632631411446, - "grad_norm": 3.4612741470336914, - "learning_rate": 3.8763945614314256e-05, - "loss": 0.7263, - "step": 76260 - }, - { - "epoch": 0.6742516664014568, - "grad_norm": 6.166719913482666, - "learning_rate": 3.8762472226642385e-05, - "loss": 0.7612, - "step": 76270 - }, - { - "epoch": 0.6743400696617692, - "grad_norm": 3.175039529800415, - "learning_rate": 3.876099883897051e-05, - "loss": 0.7465, - "step": 76280 - }, - { - "epoch": 0.6744284729220814, - "grad_norm": 2.4754281044006348, - "learning_rate": 3.875952545129865e-05, - "loss": 0.6401, - "step": 76290 - }, - { - "epoch": 0.6745168761823936, - "grad_norm": 4.258701801300049, - "learning_rate": 3.875805206362678e-05, - "loss": 0.7304, - "step": 76300 - }, - { - "epoch": 0.6746052794427059, - "grad_norm": 5.60132360458374, - "learning_rate": 3.8756578675954905e-05, - "loss": 0.65, - "step": 76310 - }, - { - "epoch": 0.6746936827030181, - "grad_norm": 3.512701988220215, - "learning_rate": 3.875510528828303e-05, - "loss": 0.6317, - "step": 76320 - }, - { - "epoch": 0.6747820859633303, - "grad_norm": 2.3437705039978027, - "learning_rate": 3.875363190061116e-05, - "loss": 0.6583, - "step": 76330 - }, - { - "epoch": 0.6748704892236426, - "grad_norm": 1.687526822090149, - "learning_rate": 3.875215851293929e-05, - "loss": 0.7704, - "step": 76340 - }, - { - "epoch": 0.6749588924839548, - "grad_norm": 11.453084945678711, - "learning_rate": 3.8750685125267425e-05, - "loss": 0.6987, - "step": 76350 - }, - { - "epoch": 0.675047295744267, - "grad_norm": 1.4400664567947388, - "learning_rate": 3.8749211737595554e-05, - "loss": 0.7892, - "step": 76360 - }, - { - "epoch": 0.6751356990045793, - "grad_norm": 2.6637187004089355, - "learning_rate": 3.874773834992368e-05, - "loss": 0.6595, - "step": 76370 - }, - { - "epoch": 0.6752241022648915, - "grad_norm": 3.611447334289551, - "learning_rate": 3.874626496225181e-05, - "loss": 0.7128, - "step": 76380 - }, - { - "epoch": 0.6753125055252037, - "grad_norm": 1.8053510189056396, - "learning_rate": 3.874479157457994e-05, - "loss": 0.558, - "step": 76390 - }, - { - "epoch": 0.6754009087855161, - "grad_norm": 8.580409049987793, - "learning_rate": 3.874331818690807e-05, - "loss": 0.646, - "step": 76400 - }, - { - "epoch": 0.6754893120458283, - "grad_norm": 2.5508487224578857, - "learning_rate": 3.87418447992362e-05, - "loss": 0.6681, - "step": 76410 - }, - { - "epoch": 0.6755777153061405, - "grad_norm": 4.451391220092773, - "learning_rate": 3.8740371411564324e-05, - "loss": 0.6011, - "step": 76420 - }, - { - "epoch": 0.6756661185664528, - "grad_norm": 1.0979185104370117, - "learning_rate": 3.873889802389246e-05, - "loss": 0.8827, - "step": 76430 - }, - { - "epoch": 0.675754521826765, - "grad_norm": 3.342114210128784, - "learning_rate": 3.873742463622059e-05, - "loss": 0.657, - "step": 76440 - }, - { - "epoch": 0.6758429250870772, - "grad_norm": 4.40913200378418, - "learning_rate": 3.8735951248548716e-05, - "loss": 0.6716, - "step": 76450 - }, - { - "epoch": 0.6759313283473894, - "grad_norm": 2.9905641078948975, - "learning_rate": 3.8734477860876844e-05, - "loss": 0.6634, - "step": 76460 - }, - { - "epoch": 0.6760197316077017, - "grad_norm": 3.614109754562378, - "learning_rate": 3.873300447320498e-05, - "loss": 0.6161, - "step": 76470 - }, - { - "epoch": 0.6761081348680139, - "grad_norm": 13.181926727294922, - "learning_rate": 3.87315310855331e-05, - "loss": 0.6864, - "step": 76480 - }, - { - "epoch": 0.6761965381283261, - "grad_norm": 2.6683850288391113, - "learning_rate": 3.8730057697861236e-05, - "loss": 0.6879, - "step": 76490 - }, - { - "epoch": 0.6762849413886384, - "grad_norm": 1.5223404169082642, - "learning_rate": 3.8728584310189364e-05, - "loss": 0.6355, - "step": 76500 - }, - { - "epoch": 0.6763733446489506, - "grad_norm": 3.8239762783050537, - "learning_rate": 3.872711092251749e-05, - "loss": 0.6753, - "step": 76510 - }, - { - "epoch": 0.676461747909263, - "grad_norm": 5.135126113891602, - "learning_rate": 3.872563753484562e-05, - "loss": 0.7254, - "step": 76520 - }, - { - "epoch": 0.6765501511695752, - "grad_norm": 2.5574920177459717, - "learning_rate": 3.872416414717375e-05, - "loss": 0.5601, - "step": 76530 - }, - { - "epoch": 0.6766385544298874, - "grad_norm": 4.815797328948975, - "learning_rate": 3.872269075950188e-05, - "loss": 0.5536, - "step": 76540 - }, - { - "epoch": 0.6767269576901996, - "grad_norm": 5.155397891998291, - "learning_rate": 3.872121737183001e-05, - "loss": 0.7275, - "step": 76550 - }, - { - "epoch": 0.6768153609505119, - "grad_norm": 2.6984620094299316, - "learning_rate": 3.8719743984158134e-05, - "loss": 0.6707, - "step": 76560 - }, - { - "epoch": 0.6769037642108241, - "grad_norm": 3.107577085494995, - "learning_rate": 3.871827059648627e-05, - "loss": 0.746, - "step": 76570 - }, - { - "epoch": 0.6769921674711363, - "grad_norm": 2.2539663314819336, - "learning_rate": 3.87167972088144e-05, - "loss": 0.8251, - "step": 76580 - }, - { - "epoch": 0.6770805707314486, - "grad_norm": 2.646031141281128, - "learning_rate": 3.8715323821142526e-05, - "loss": 0.7333, - "step": 76590 - }, - { - "epoch": 0.6771689739917608, - "grad_norm": 1.3493362665176392, - "learning_rate": 3.8713850433470654e-05, - "loss": 0.6736, - "step": 76600 - }, - { - "epoch": 0.677257377252073, - "grad_norm": 2.064878225326538, - "learning_rate": 3.871237704579879e-05, - "loss": 0.5603, - "step": 76610 - }, - { - "epoch": 0.6773457805123853, - "grad_norm": 1.6407034397125244, - "learning_rate": 3.871090365812691e-05, - "loss": 0.6618, - "step": 76620 - }, - { - "epoch": 0.6774341837726975, - "grad_norm": 7.08697509765625, - "learning_rate": 3.8709430270455046e-05, - "loss": 0.6694, - "step": 76630 - }, - { - "epoch": 0.6775225870330098, - "grad_norm": 1.1913646459579468, - "learning_rate": 3.870795688278317e-05, - "loss": 0.6065, - "step": 76640 - }, - { - "epoch": 0.6776109902933221, - "grad_norm": 4.2500386238098145, - "learning_rate": 3.87064834951113e-05, - "loss": 0.706, - "step": 76650 - }, - { - "epoch": 0.6776993935536343, - "grad_norm": 4.372733116149902, - "learning_rate": 3.870501010743943e-05, - "loss": 0.8389, - "step": 76660 - }, - { - "epoch": 0.6777877968139465, - "grad_norm": 2.3600168228149414, - "learning_rate": 3.870353671976756e-05, - "loss": 0.6175, - "step": 76670 - }, - { - "epoch": 0.6778762000742588, - "grad_norm": 7.481557846069336, - "learning_rate": 3.870206333209569e-05, - "loss": 0.6869, - "step": 76680 - }, - { - "epoch": 0.677964603334571, - "grad_norm": 1.741132378578186, - "learning_rate": 3.870058994442382e-05, - "loss": 0.6602, - "step": 76690 - }, - { - "epoch": 0.6780530065948832, - "grad_norm": 1.6569914817810059, - "learning_rate": 3.8699116556751945e-05, - "loss": 0.754, - "step": 76700 - }, - { - "epoch": 0.6781414098551954, - "grad_norm": 3.491014003753662, - "learning_rate": 3.869764316908008e-05, - "loss": 0.6877, - "step": 76710 - }, - { - "epoch": 0.6782298131155077, - "grad_norm": 1.5371977090835571, - "learning_rate": 3.869616978140821e-05, - "loss": 0.5571, - "step": 76720 - }, - { - "epoch": 0.6783182163758199, - "grad_norm": 6.5803399085998535, - "learning_rate": 3.8694696393736337e-05, - "loss": 0.698, - "step": 76730 - }, - { - "epoch": 0.6784066196361321, - "grad_norm": 2.120131492614746, - "learning_rate": 3.8693223006064465e-05, - "loss": 0.714, - "step": 76740 - }, - { - "epoch": 0.6784950228964444, - "grad_norm": 8.216854095458984, - "learning_rate": 3.86917496183926e-05, - "loss": 0.7117, - "step": 76750 - }, - { - "epoch": 0.6785834261567567, - "grad_norm": 3.822622299194336, - "learning_rate": 3.869027623072072e-05, - "loss": 0.7071, - "step": 76760 - }, - { - "epoch": 0.6786718294170689, - "grad_norm": 3.3949902057647705, - "learning_rate": 3.868880284304886e-05, - "loss": 0.5266, - "step": 76770 - }, - { - "epoch": 0.6787602326773812, - "grad_norm": 3.3918614387512207, - "learning_rate": 3.868732945537698e-05, - "loss": 0.5577, - "step": 76780 - }, - { - "epoch": 0.6788486359376934, - "grad_norm": 7.679214000701904, - "learning_rate": 3.8685856067705113e-05, - "loss": 0.8213, - "step": 76790 - }, - { - "epoch": 0.6789370391980056, - "grad_norm": 3.390754461288452, - "learning_rate": 3.868438268003324e-05, - "loss": 0.6292, - "step": 76800 - }, - { - "epoch": 0.6790254424583179, - "grad_norm": 5.8655686378479, - "learning_rate": 3.868290929236137e-05, - "loss": 0.6511, - "step": 76810 - }, - { - "epoch": 0.6791138457186301, - "grad_norm": 13.011842727661133, - "learning_rate": 3.86814359046895e-05, - "loss": 0.734, - "step": 76820 - }, - { - "epoch": 0.6792022489789423, - "grad_norm": 1.3025916814804077, - "learning_rate": 3.8679962517017634e-05, - "loss": 0.6747, - "step": 76830 - }, - { - "epoch": 0.6792906522392546, - "grad_norm": 14.641339302062988, - "learning_rate": 3.8678489129345755e-05, - "loss": 0.6175, - "step": 76840 - }, - { - "epoch": 0.6793790554995668, - "grad_norm": 1.6169750690460205, - "learning_rate": 3.867701574167389e-05, - "loss": 0.6741, - "step": 76850 - }, - { - "epoch": 0.679467458759879, - "grad_norm": 3.9084529876708984, - "learning_rate": 3.867554235400202e-05, - "loss": 0.6247, - "step": 76860 - }, - { - "epoch": 0.6795558620201914, - "grad_norm": 1.8821941614151, - "learning_rate": 3.867406896633015e-05, - "loss": 0.6401, - "step": 76870 - }, - { - "epoch": 0.6796442652805036, - "grad_norm": 3.509068250656128, - "learning_rate": 3.8672595578658275e-05, - "loss": 0.5333, - "step": 76880 - }, - { - "epoch": 0.6797326685408158, - "grad_norm": 13.12830638885498, - "learning_rate": 3.8671122190986404e-05, - "loss": 0.686, - "step": 76890 - }, - { - "epoch": 0.679821071801128, - "grad_norm": 2.8814942836761475, - "learning_rate": 3.866964880331453e-05, - "loss": 0.6663, - "step": 76900 - }, - { - "epoch": 0.6799094750614403, - "grad_norm": 6.208309173583984, - "learning_rate": 3.866817541564267e-05, - "loss": 0.6331, - "step": 76910 - }, - { - "epoch": 0.6799978783217525, - "grad_norm": 1.1584899425506592, - "learning_rate": 3.866670202797079e-05, - "loss": 0.6199, - "step": 76920 - }, - { - "epoch": 0.6800862815820647, - "grad_norm": 1.685923457145691, - "learning_rate": 3.8665228640298924e-05, - "loss": 0.6051, - "step": 76930 - }, - { - "epoch": 0.680174684842377, - "grad_norm": 3.3316476345062256, - "learning_rate": 3.866375525262705e-05, - "loss": 0.7501, - "step": 76940 - }, - { - "epoch": 0.6802630881026892, - "grad_norm": 1.666646122932434, - "learning_rate": 3.866228186495518e-05, - "loss": 0.5034, - "step": 76950 - }, - { - "epoch": 0.6803514913630014, - "grad_norm": 3.861109495162964, - "learning_rate": 3.866080847728331e-05, - "loss": 0.6992, - "step": 76960 - }, - { - "epoch": 0.6804398946233137, - "grad_norm": 4.303036689758301, - "learning_rate": 3.8659335089611444e-05, - "loss": 0.6443, - "step": 76970 - }, - { - "epoch": 0.6805282978836259, - "grad_norm": 7.868462562561035, - "learning_rate": 3.8657861701939566e-05, - "loss": 0.6491, - "step": 76980 - }, - { - "epoch": 0.6806167011439382, - "grad_norm": 4.3306450843811035, - "learning_rate": 3.86563883142677e-05, - "loss": 0.663, - "step": 76990 - }, - { - "epoch": 0.6807051044042505, - "grad_norm": 2.773069143295288, - "learning_rate": 3.865491492659582e-05, - "loss": 0.7513, - "step": 77000 - }, - { - "epoch": 0.6807935076645627, - "grad_norm": 2.352480173110962, - "learning_rate": 3.865344153892396e-05, - "loss": 0.6102, - "step": 77010 - }, - { - "epoch": 0.6808819109248749, - "grad_norm": 2.846865177154541, - "learning_rate": 3.8651968151252086e-05, - "loss": 0.7337, - "step": 77020 - }, - { - "epoch": 0.6809703141851872, - "grad_norm": 2.99969744682312, - "learning_rate": 3.8650494763580214e-05, - "loss": 0.7258, - "step": 77030 - }, - { - "epoch": 0.6810587174454994, - "grad_norm": 3.2447099685668945, - "learning_rate": 3.864902137590834e-05, - "loss": 0.7632, - "step": 77040 - }, - { - "epoch": 0.6811471207058116, - "grad_norm": 1.6464154720306396, - "learning_rate": 3.864754798823648e-05, - "loss": 0.5715, - "step": 77050 - }, - { - "epoch": 0.6812355239661239, - "grad_norm": 6.3425188064575195, - "learning_rate": 3.86460746005646e-05, - "loss": 0.786, - "step": 77060 - }, - { - "epoch": 0.6813239272264361, - "grad_norm": 9.601325988769531, - "learning_rate": 3.8644601212892734e-05, - "loss": 0.5505, - "step": 77070 - }, - { - "epoch": 0.6814123304867483, - "grad_norm": 4.141589641571045, - "learning_rate": 3.864312782522086e-05, - "loss": 0.7429, - "step": 77080 - }, - { - "epoch": 0.6815007337470605, - "grad_norm": 7.141766548156738, - "learning_rate": 3.864165443754899e-05, - "loss": 0.6463, - "step": 77090 - }, - { - "epoch": 0.6815891370073728, - "grad_norm": 3.5620219707489014, - "learning_rate": 3.864018104987712e-05, - "loss": 0.8364, - "step": 77100 - }, - { - "epoch": 0.6816775402676851, - "grad_norm": 2.0552573204040527, - "learning_rate": 3.863870766220525e-05, - "loss": 0.7667, - "step": 77110 - }, - { - "epoch": 0.6817659435279974, - "grad_norm": 2.395127773284912, - "learning_rate": 3.8637234274533376e-05, - "loss": 0.5508, - "step": 77120 - }, - { - "epoch": 0.6818543467883096, - "grad_norm": 5.132692813873291, - "learning_rate": 3.863576088686151e-05, - "loss": 0.7267, - "step": 77130 - }, - { - "epoch": 0.6819427500486218, - "grad_norm": 9.271647453308105, - "learning_rate": 3.863428749918964e-05, - "loss": 0.6817, - "step": 77140 - }, - { - "epoch": 0.682031153308934, - "grad_norm": 4.211577892303467, - "learning_rate": 3.863281411151777e-05, - "loss": 0.6936, - "step": 77150 - }, - { - "epoch": 0.6821195565692463, - "grad_norm": 5.926328659057617, - "learning_rate": 3.8631340723845896e-05, - "loss": 0.6266, - "step": 77160 - }, - { - "epoch": 0.6822079598295585, - "grad_norm": 9.586467742919922, - "learning_rate": 3.8629867336174025e-05, - "loss": 0.5502, - "step": 77170 - }, - { - "epoch": 0.6822963630898707, - "grad_norm": 3.0919978618621826, - "learning_rate": 3.862839394850215e-05, - "loss": 0.6783, - "step": 77180 - }, - { - "epoch": 0.682384766350183, - "grad_norm": 11.795863151550293, - "learning_rate": 3.862692056083029e-05, - "loss": 0.6815, - "step": 77190 - }, - { - "epoch": 0.6824731696104952, - "grad_norm": 2.4316375255584717, - "learning_rate": 3.862544717315842e-05, - "loss": 0.763, - "step": 77200 - }, - { - "epoch": 0.6825615728708074, - "grad_norm": 6.4402923583984375, - "learning_rate": 3.8623973785486545e-05, - "loss": 0.746, - "step": 77210 - }, - { - "epoch": 0.6826499761311197, - "grad_norm": 3.1197142601013184, - "learning_rate": 3.862250039781467e-05, - "loss": 0.6944, - "step": 77220 - }, - { - "epoch": 0.682738379391432, - "grad_norm": 2.88101863861084, - "learning_rate": 3.86210270101428e-05, - "loss": 0.6582, - "step": 77230 - }, - { - "epoch": 0.6828267826517442, - "grad_norm": 2.6732773780822754, - "learning_rate": 3.861955362247093e-05, - "loss": 0.6823, - "step": 77240 - }, - { - "epoch": 0.6829151859120565, - "grad_norm": 2.0981602668762207, - "learning_rate": 3.861808023479906e-05, - "loss": 0.6391, - "step": 77250 - }, - { - "epoch": 0.6830035891723687, - "grad_norm": 1.3534191846847534, - "learning_rate": 3.8616606847127194e-05, - "loss": 0.6871, - "step": 77260 - }, - { - "epoch": 0.6830919924326809, - "grad_norm": 6.169220447540283, - "learning_rate": 3.861513345945532e-05, - "loss": 0.7267, - "step": 77270 - }, - { - "epoch": 0.6831803956929932, - "grad_norm": 5.3853678703308105, - "learning_rate": 3.861366007178345e-05, - "loss": 0.6419, - "step": 77280 - }, - { - "epoch": 0.6832687989533054, - "grad_norm": 2.234717845916748, - "learning_rate": 3.861218668411158e-05, - "loss": 0.6934, - "step": 77290 - }, - { - "epoch": 0.6833572022136176, - "grad_norm": 3.117443323135376, - "learning_rate": 3.861071329643971e-05, - "loss": 0.7977, - "step": 77300 - }, - { - "epoch": 0.6834456054739299, - "grad_norm": 1.4964114427566528, - "learning_rate": 3.8609239908767835e-05, - "loss": 0.6945, - "step": 77310 - }, - { - "epoch": 0.6835340087342421, - "grad_norm": 2.7582192420959473, - "learning_rate": 3.860776652109597e-05, - "loss": 0.6983, - "step": 77320 - }, - { - "epoch": 0.6836224119945543, - "grad_norm": 2.0405874252319336, - "learning_rate": 3.86062931334241e-05, - "loss": 0.6828, - "step": 77330 - }, - { - "epoch": 0.6837108152548665, - "grad_norm": 1.4996368885040283, - "learning_rate": 3.860481974575223e-05, - "loss": 0.7164, - "step": 77340 - }, - { - "epoch": 0.6837992185151789, - "grad_norm": 2.466998338699341, - "learning_rate": 3.8603346358080355e-05, - "loss": 0.8637, - "step": 77350 - }, - { - "epoch": 0.6838876217754911, - "grad_norm": 7.78523588180542, - "learning_rate": 3.8601872970408484e-05, - "loss": 0.6814, - "step": 77360 - }, - { - "epoch": 0.6839760250358033, - "grad_norm": 1.4570581912994385, - "learning_rate": 3.860039958273661e-05, - "loss": 0.6079, - "step": 77370 - }, - { - "epoch": 0.6840644282961156, - "grad_norm": 8.560462951660156, - "learning_rate": 3.859892619506475e-05, - "loss": 0.557, - "step": 77380 - }, - { - "epoch": 0.6841528315564278, - "grad_norm": 7.949138164520264, - "learning_rate": 3.859745280739287e-05, - "loss": 0.7307, - "step": 77390 - }, - { - "epoch": 0.68424123481674, - "grad_norm": 1.9432971477508545, - "learning_rate": 3.8595979419721004e-05, - "loss": 0.6246, - "step": 77400 - }, - { - "epoch": 0.6843296380770523, - "grad_norm": 2.5344889163970947, - "learning_rate": 3.859450603204913e-05, - "loss": 0.7123, - "step": 77410 - }, - { - "epoch": 0.6844180413373645, - "grad_norm": 5.848361492156982, - "learning_rate": 3.859303264437726e-05, - "loss": 0.6575, - "step": 77420 - }, - { - "epoch": 0.6845064445976767, - "grad_norm": 2.0107686519622803, - "learning_rate": 3.859155925670539e-05, - "loss": 0.5882, - "step": 77430 - }, - { - "epoch": 0.684594847857989, - "grad_norm": 2.0502915382385254, - "learning_rate": 3.8590085869033524e-05, - "loss": 0.7121, - "step": 77440 - }, - { - "epoch": 0.6846832511183012, - "grad_norm": 3.6624529361724854, - "learning_rate": 3.8588612481361646e-05, - "loss": 0.5824, - "step": 77450 - }, - { - "epoch": 0.6847716543786135, - "grad_norm": 6.3490891456604, - "learning_rate": 3.858713909368978e-05, - "loss": 0.5814, - "step": 77460 - }, - { - "epoch": 0.6848600576389258, - "grad_norm": 2.2851083278656006, - "learning_rate": 3.85856657060179e-05, - "loss": 0.6769, - "step": 77470 - }, - { - "epoch": 0.684948460899238, - "grad_norm": 1.1894772052764893, - "learning_rate": 3.858419231834604e-05, - "loss": 0.5305, - "step": 77480 - }, - { - "epoch": 0.6850368641595502, - "grad_norm": 1.4727857112884521, - "learning_rate": 3.8582718930674166e-05, - "loss": 0.6348, - "step": 77490 - }, - { - "epoch": 0.6851252674198625, - "grad_norm": 5.069452285766602, - "learning_rate": 3.8581245543002294e-05, - "loss": 0.6586, - "step": 77500 - }, - { - "epoch": 0.6852136706801747, - "grad_norm": 1.811840295791626, - "learning_rate": 3.857977215533042e-05, - "loss": 0.6417, - "step": 77510 - }, - { - "epoch": 0.6853020739404869, - "grad_norm": 1.2616230249404907, - "learning_rate": 3.857829876765856e-05, - "loss": 0.5941, - "step": 77520 - }, - { - "epoch": 0.6853904772007992, - "grad_norm": 1.8622196912765503, - "learning_rate": 3.857682537998668e-05, - "loss": 0.7069, - "step": 77530 - }, - { - "epoch": 0.6854788804611114, - "grad_norm": 3.8690836429595947, - "learning_rate": 3.8575351992314815e-05, - "loss": 0.6264, - "step": 77540 - }, - { - "epoch": 0.6855672837214236, - "grad_norm": 2.098522424697876, - "learning_rate": 3.857387860464294e-05, - "loss": 0.6378, - "step": 77550 - }, - { - "epoch": 0.6856556869817358, - "grad_norm": 1.5568214654922485, - "learning_rate": 3.857240521697107e-05, - "loss": 0.723, - "step": 77560 - }, - { - "epoch": 0.6857440902420481, - "grad_norm": 3.2480857372283936, - "learning_rate": 3.85709318292992e-05, - "loss": 0.6516, - "step": 77570 - }, - { - "epoch": 0.6858324935023604, - "grad_norm": 1.9615591764450073, - "learning_rate": 3.856945844162733e-05, - "loss": 0.7321, - "step": 77580 - }, - { - "epoch": 0.6859208967626726, - "grad_norm": 3.882704734802246, - "learning_rate": 3.8567985053955456e-05, - "loss": 0.6662, - "step": 77590 - }, - { - "epoch": 0.6860093000229849, - "grad_norm": 3.7366836071014404, - "learning_rate": 3.856651166628359e-05, - "loss": 0.7001, - "step": 77600 - }, - { - "epoch": 0.6860977032832971, - "grad_norm": 5.924266338348389, - "learning_rate": 3.856503827861171e-05, - "loss": 0.7357, - "step": 77610 - }, - { - "epoch": 0.6861861065436093, - "grad_norm": 2.591278076171875, - "learning_rate": 3.856356489093985e-05, - "loss": 0.5735, - "step": 77620 - }, - { - "epoch": 0.6862745098039216, - "grad_norm": 4.934967041015625, - "learning_rate": 3.8562091503267977e-05, - "loss": 0.6293, - "step": 77630 - }, - { - "epoch": 0.6863629130642338, - "grad_norm": 6.791815757751465, - "learning_rate": 3.8560618115596105e-05, - "loss": 0.6967, - "step": 77640 - }, - { - "epoch": 0.686451316324546, - "grad_norm": 3.148416042327881, - "learning_rate": 3.855914472792423e-05, - "loss": 0.655, - "step": 77650 - }, - { - "epoch": 0.6865397195848583, - "grad_norm": 3.7165634632110596, - "learning_rate": 3.855767134025237e-05, - "loss": 0.8598, - "step": 77660 - }, - { - "epoch": 0.6866281228451705, - "grad_norm": 8.821511268615723, - "learning_rate": 3.855619795258049e-05, - "loss": 0.6967, - "step": 77670 - }, - { - "epoch": 0.6867165261054827, - "grad_norm": 6.567383766174316, - "learning_rate": 3.8554724564908625e-05, - "loss": 0.7457, - "step": 77680 - }, - { - "epoch": 0.686804929365795, - "grad_norm": 6.200684547424316, - "learning_rate": 3.8553251177236753e-05, - "loss": 0.6479, - "step": 77690 - }, - { - "epoch": 0.6868933326261073, - "grad_norm": 4.559550762176514, - "learning_rate": 3.855177778956488e-05, - "loss": 0.7718, - "step": 77700 - }, - { - "epoch": 0.6869817358864195, - "grad_norm": 2.284128427505493, - "learning_rate": 3.855030440189301e-05, - "loss": 0.7383, - "step": 77710 - }, - { - "epoch": 0.6870701391467318, - "grad_norm": 6.826602458953857, - "learning_rate": 3.854883101422114e-05, - "loss": 0.7224, - "step": 77720 - }, - { - "epoch": 0.687158542407044, - "grad_norm": 3.140498638153076, - "learning_rate": 3.854735762654927e-05, - "loss": 0.6355, - "step": 77730 - }, - { - "epoch": 0.6872469456673562, - "grad_norm": 4.787046909332275, - "learning_rate": 3.85458842388774e-05, - "loss": 0.5156, - "step": 77740 - }, - { - "epoch": 0.6873353489276685, - "grad_norm": 0.96241295337677, - "learning_rate": 3.8544410851205524e-05, - "loss": 0.6055, - "step": 77750 - }, - { - "epoch": 0.6874237521879807, - "grad_norm": 1.3984150886535645, - "learning_rate": 3.854293746353366e-05, - "loss": 0.5456, - "step": 77760 - }, - { - "epoch": 0.6875121554482929, - "grad_norm": 2.450329065322876, - "learning_rate": 3.854146407586179e-05, - "loss": 0.7398, - "step": 77770 - }, - { - "epoch": 0.6876005587086051, - "grad_norm": 7.138993263244629, - "learning_rate": 3.8539990688189915e-05, - "loss": 0.6696, - "step": 77780 - }, - { - "epoch": 0.6876889619689174, - "grad_norm": 2.7971534729003906, - "learning_rate": 3.8538517300518044e-05, - "loss": 0.604, - "step": 77790 - }, - { - "epoch": 0.6877773652292296, - "grad_norm": 2.738062858581543, - "learning_rate": 3.853704391284618e-05, - "loss": 0.6613, - "step": 77800 - }, - { - "epoch": 0.6878657684895418, - "grad_norm": 6.439538478851318, - "learning_rate": 3.85355705251743e-05, - "loss": 0.6827, - "step": 77810 - }, - { - "epoch": 0.6879541717498542, - "grad_norm": 2.194342613220215, - "learning_rate": 3.8534097137502436e-05, - "loss": 0.6879, - "step": 77820 - }, - { - "epoch": 0.6880425750101664, - "grad_norm": 4.100100994110107, - "learning_rate": 3.853262374983056e-05, - "loss": 0.4775, - "step": 77830 - }, - { - "epoch": 0.6881309782704786, - "grad_norm": 1.8053207397460938, - "learning_rate": 3.853115036215869e-05, - "loss": 0.847, - "step": 77840 - }, - { - "epoch": 0.6882193815307909, - "grad_norm": 7.048489570617676, - "learning_rate": 3.852967697448682e-05, - "loss": 0.6523, - "step": 77850 - }, - { - "epoch": 0.6883077847911031, - "grad_norm": 14.310152053833008, - "learning_rate": 3.852820358681495e-05, - "loss": 0.7075, - "step": 77860 - }, - { - "epoch": 0.6883961880514153, - "grad_norm": 6.7928009033203125, - "learning_rate": 3.852673019914308e-05, - "loss": 0.6874, - "step": 77870 - }, - { - "epoch": 0.6884845913117276, - "grad_norm": 3.396361827850342, - "learning_rate": 3.852525681147121e-05, - "loss": 0.6903, - "step": 77880 - }, - { - "epoch": 0.6885729945720398, - "grad_norm": 2.094087839126587, - "learning_rate": 3.8523783423799334e-05, - "loss": 0.606, - "step": 77890 - }, - { - "epoch": 0.688661397832352, - "grad_norm": 2.478360176086426, - "learning_rate": 3.852231003612747e-05, - "loss": 0.5834, - "step": 77900 - }, - { - "epoch": 0.6887498010926643, - "grad_norm": 4.949512958526611, - "learning_rate": 3.85208366484556e-05, - "loss": 0.6903, - "step": 77910 - }, - { - "epoch": 0.6888382043529765, - "grad_norm": 9.530298233032227, - "learning_rate": 3.8519363260783726e-05, - "loss": 0.7446, - "step": 77920 - }, - { - "epoch": 0.6889266076132888, - "grad_norm": 4.61517858505249, - "learning_rate": 3.8517889873111854e-05, - "loss": 0.6037, - "step": 77930 - }, - { - "epoch": 0.6890150108736011, - "grad_norm": 3.588691234588623, - "learning_rate": 3.851641648543998e-05, - "loss": 0.7105, - "step": 77940 - }, - { - "epoch": 0.6891034141339133, - "grad_norm": 2.0319581031799316, - "learning_rate": 3.851494309776811e-05, - "loss": 0.6975, - "step": 77950 - }, - { - "epoch": 0.6891918173942255, - "grad_norm": 4.805972576141357, - "learning_rate": 3.8513469710096246e-05, - "loss": 0.6088, - "step": 77960 - }, - { - "epoch": 0.6892802206545378, - "grad_norm": 5.29019832611084, - "learning_rate": 3.851199632242437e-05, - "loss": 0.696, - "step": 77970 - }, - { - "epoch": 0.68936862391485, - "grad_norm": 3.0730087757110596, - "learning_rate": 3.85105229347525e-05, - "loss": 0.6287, - "step": 77980 - }, - { - "epoch": 0.6894570271751622, - "grad_norm": 3.155669689178467, - "learning_rate": 3.850904954708063e-05, - "loss": 0.567, - "step": 77990 - }, - { - "epoch": 0.6895454304354744, - "grad_norm": 1.2867512702941895, - "learning_rate": 3.850757615940876e-05, - "loss": 0.6755, - "step": 78000 - }, - { - "epoch": 0.6896338336957867, - "grad_norm": 3.9351489543914795, - "learning_rate": 3.850610277173689e-05, - "loss": 0.8449, - "step": 78010 - }, - { - "epoch": 0.6897222369560989, - "grad_norm": 7.071952819824219, - "learning_rate": 3.850462938406502e-05, - "loss": 0.6631, - "step": 78020 - }, - { - "epoch": 0.6898106402164111, - "grad_norm": 1.500244140625, - "learning_rate": 3.8503155996393145e-05, - "loss": 0.7634, - "step": 78030 - }, - { - "epoch": 0.6898990434767234, - "grad_norm": 3.5885379314422607, - "learning_rate": 3.850168260872128e-05, - "loss": 0.7424, - "step": 78040 - }, - { - "epoch": 0.6899874467370357, - "grad_norm": 3.835240602493286, - "learning_rate": 3.850020922104941e-05, - "loss": 0.835, - "step": 78050 - }, - { - "epoch": 0.690075849997348, - "grad_norm": 4.1147565841674805, - "learning_rate": 3.8498735833377536e-05, - "loss": 0.6442, - "step": 78060 - }, - { - "epoch": 0.6901642532576602, - "grad_norm": 1.1382116079330444, - "learning_rate": 3.8497262445705665e-05, - "loss": 0.6578, - "step": 78070 - }, - { - "epoch": 0.6902526565179724, - "grad_norm": 1.4677952527999878, - "learning_rate": 3.849578905803379e-05, - "loss": 0.6354, - "step": 78080 - }, - { - "epoch": 0.6903410597782846, - "grad_norm": 2.0075416564941406, - "learning_rate": 3.849431567036192e-05, - "loss": 0.6277, - "step": 78090 - }, - { - "epoch": 0.6904294630385969, - "grad_norm": 2.518988609313965, - "learning_rate": 3.8492842282690057e-05, - "loss": 0.8074, - "step": 78100 - }, - { - "epoch": 0.6905178662989091, - "grad_norm": 1.1049258708953857, - "learning_rate": 3.8491368895018185e-05, - "loss": 0.6466, - "step": 78110 - }, - { - "epoch": 0.6906062695592213, - "grad_norm": 1.5145949125289917, - "learning_rate": 3.848989550734631e-05, - "loss": 0.6499, - "step": 78120 - }, - { - "epoch": 0.6906946728195336, - "grad_norm": 3.385376214981079, - "learning_rate": 3.848842211967444e-05, - "loss": 0.6591, - "step": 78130 - }, - { - "epoch": 0.6907830760798458, - "grad_norm": 12.704715728759766, - "learning_rate": 3.848694873200257e-05, - "loss": 0.6504, - "step": 78140 - }, - { - "epoch": 0.690871479340158, - "grad_norm": 1.2976224422454834, - "learning_rate": 3.84854753443307e-05, - "loss": 0.6289, - "step": 78150 - }, - { - "epoch": 0.6909598826004703, - "grad_norm": 1.834031581878662, - "learning_rate": 3.8484001956658833e-05, - "loss": 0.7837, - "step": 78160 - }, - { - "epoch": 0.6910482858607826, - "grad_norm": 3.1878788471221924, - "learning_rate": 3.848252856898696e-05, - "loss": 0.6847, - "step": 78170 - }, - { - "epoch": 0.6911366891210948, - "grad_norm": 4.563849449157715, - "learning_rate": 3.848105518131509e-05, - "loss": 0.7368, - "step": 78180 - }, - { - "epoch": 0.6912250923814071, - "grad_norm": 5.8437676429748535, - "learning_rate": 3.847958179364322e-05, - "loss": 0.6023, - "step": 78190 - }, - { - "epoch": 0.6913134956417193, - "grad_norm": 1.4736589193344116, - "learning_rate": 3.847810840597135e-05, - "loss": 0.7275, - "step": 78200 - }, - { - "epoch": 0.6914018989020315, - "grad_norm": 1.7320752143859863, - "learning_rate": 3.8476635018299475e-05, - "loss": 0.7797, - "step": 78210 - }, - { - "epoch": 0.6914903021623438, - "grad_norm": 2.283383369445801, - "learning_rate": 3.8475161630627604e-05, - "loss": 0.74, - "step": 78220 - }, - { - "epoch": 0.691578705422656, - "grad_norm": 1.9529945850372314, - "learning_rate": 3.847368824295574e-05, - "loss": 0.6701, - "step": 78230 - }, - { - "epoch": 0.6916671086829682, - "grad_norm": 1.152864694595337, - "learning_rate": 3.847221485528387e-05, - "loss": 0.7087, - "step": 78240 - }, - { - "epoch": 0.6917555119432804, - "grad_norm": 6.683051586151123, - "learning_rate": 3.8470741467611995e-05, - "loss": 0.5918, - "step": 78250 - }, - { - "epoch": 0.6918439152035927, - "grad_norm": 2.7650420665740967, - "learning_rate": 3.8469268079940124e-05, - "loss": 0.6306, - "step": 78260 - }, - { - "epoch": 0.6919323184639049, - "grad_norm": 3.6181538105010986, - "learning_rate": 3.846779469226825e-05, - "loss": 0.7452, - "step": 78270 - }, - { - "epoch": 0.6920207217242171, - "grad_norm": 8.790289878845215, - "learning_rate": 3.846632130459638e-05, - "loss": 0.702, - "step": 78280 - }, - { - "epoch": 0.6921091249845295, - "grad_norm": 7.053476810455322, - "learning_rate": 3.8464847916924516e-05, - "loss": 0.7591, - "step": 78290 - }, - { - "epoch": 0.6921975282448417, - "grad_norm": 4.7699785232543945, - "learning_rate": 3.846337452925264e-05, - "loss": 0.7871, - "step": 78300 - }, - { - "epoch": 0.6922859315051539, - "grad_norm": 2.2361674308776855, - "learning_rate": 3.846190114158077e-05, - "loss": 0.7456, - "step": 78310 - }, - { - "epoch": 0.6923743347654662, - "grad_norm": 4.297597408294678, - "learning_rate": 3.84604277539089e-05, - "loss": 0.6889, - "step": 78320 - }, - { - "epoch": 0.6924627380257784, - "grad_norm": 4.675218105316162, - "learning_rate": 3.845895436623703e-05, - "loss": 0.6225, - "step": 78330 - }, - { - "epoch": 0.6925511412860906, - "grad_norm": 1.6113498210906982, - "learning_rate": 3.845748097856516e-05, - "loss": 0.5882, - "step": 78340 - }, - { - "epoch": 0.6926395445464029, - "grad_norm": 5.541153430938721, - "learning_rate": 3.845600759089329e-05, - "loss": 0.565, - "step": 78350 - }, - { - "epoch": 0.6927279478067151, - "grad_norm": 1.596197247505188, - "learning_rate": 3.8454534203221414e-05, - "loss": 0.6401, - "step": 78360 - }, - { - "epoch": 0.6928163510670273, - "grad_norm": 4.908422946929932, - "learning_rate": 3.845306081554955e-05, - "loss": 0.7363, - "step": 78370 - }, - { - "epoch": 0.6929047543273396, - "grad_norm": 5.7742486000061035, - "learning_rate": 3.845158742787768e-05, - "loss": 0.5931, - "step": 78380 - }, - { - "epoch": 0.6929931575876518, - "grad_norm": 3.7057316303253174, - "learning_rate": 3.8450114040205806e-05, - "loss": 0.6672, - "step": 78390 - }, - { - "epoch": 0.693081560847964, - "grad_norm": 2.952052593231201, - "learning_rate": 3.8448640652533934e-05, - "loss": 0.6726, - "step": 78400 - }, - { - "epoch": 0.6931699641082764, - "grad_norm": 5.1991400718688965, - "learning_rate": 3.844716726486206e-05, - "loss": 0.5413, - "step": 78410 - }, - { - "epoch": 0.6932583673685886, - "grad_norm": 3.4608805179595947, - "learning_rate": 3.844569387719019e-05, - "loss": 0.6158, - "step": 78420 - }, - { - "epoch": 0.6933467706289008, - "grad_norm": 3.589613437652588, - "learning_rate": 3.8444220489518326e-05, - "loss": 0.7783, - "step": 78430 - }, - { - "epoch": 0.693435173889213, - "grad_norm": 11.545126914978027, - "learning_rate": 3.844274710184645e-05, - "loss": 0.6587, - "step": 78440 - }, - { - "epoch": 0.6935235771495253, - "grad_norm": 9.156929016113281, - "learning_rate": 3.844127371417458e-05, - "loss": 0.7596, - "step": 78450 - }, - { - "epoch": 0.6936119804098375, - "grad_norm": 3.0845587253570557, - "learning_rate": 3.843980032650271e-05, - "loss": 0.6724, - "step": 78460 - }, - { - "epoch": 0.6937003836701497, - "grad_norm": 1.9026954174041748, - "learning_rate": 3.843832693883084e-05, - "loss": 0.5653, - "step": 78470 - }, - { - "epoch": 0.693788786930462, - "grad_norm": 6.188068866729736, - "learning_rate": 3.843685355115897e-05, - "loss": 0.7205, - "step": 78480 - }, - { - "epoch": 0.6938771901907742, - "grad_norm": 1.2324304580688477, - "learning_rate": 3.84353801634871e-05, - "loss": 0.7364, - "step": 78490 - }, - { - "epoch": 0.6939655934510864, - "grad_norm": 1.2918884754180908, - "learning_rate": 3.8433906775815225e-05, - "loss": 0.7628, - "step": 78500 - }, - { - "epoch": 0.6940539967113987, - "grad_norm": 3.9561684131622314, - "learning_rate": 3.843243338814336e-05, - "loss": 0.6541, - "step": 78510 - }, - { - "epoch": 0.694142399971711, - "grad_norm": 5.111685276031494, - "learning_rate": 3.843096000047148e-05, - "loss": 0.6263, - "step": 78520 - }, - { - "epoch": 0.6942308032320232, - "grad_norm": 2.7367007732391357, - "learning_rate": 3.8429486612799616e-05, - "loss": 0.5999, - "step": 78530 - }, - { - "epoch": 0.6943192064923355, - "grad_norm": 11.13370418548584, - "learning_rate": 3.8428013225127745e-05, - "loss": 0.7875, - "step": 78540 - }, - { - "epoch": 0.6944076097526477, - "grad_norm": 2.065875768661499, - "learning_rate": 3.842653983745587e-05, - "loss": 0.743, - "step": 78550 - }, - { - "epoch": 0.6944960130129599, - "grad_norm": 1.517561912536621, - "learning_rate": 3.8425066449784e-05, - "loss": 0.5215, - "step": 78560 - }, - { - "epoch": 0.6945844162732722, - "grad_norm": 9.550228118896484, - "learning_rate": 3.842359306211214e-05, - "loss": 0.6063, - "step": 78570 - }, - { - "epoch": 0.6946728195335844, - "grad_norm": 3.929105758666992, - "learning_rate": 3.842211967444026e-05, - "loss": 0.608, - "step": 78580 - }, - { - "epoch": 0.6947612227938966, - "grad_norm": 4.663963794708252, - "learning_rate": 3.842064628676839e-05, - "loss": 0.6964, - "step": 78590 - }, - { - "epoch": 0.6948496260542089, - "grad_norm": 23.641374588012695, - "learning_rate": 3.841917289909652e-05, - "loss": 0.6725, - "step": 78600 - }, - { - "epoch": 0.6949380293145211, - "grad_norm": 5.547507286071777, - "learning_rate": 3.841769951142465e-05, - "loss": 0.6349, - "step": 78610 - }, - { - "epoch": 0.6950264325748333, - "grad_norm": 5.105334281921387, - "learning_rate": 3.841622612375278e-05, - "loss": 0.5902, - "step": 78620 - }, - { - "epoch": 0.6951148358351456, - "grad_norm": 3.687915563583374, - "learning_rate": 3.8414752736080914e-05, - "loss": 0.8202, - "step": 78630 - }, - { - "epoch": 0.6952032390954579, - "grad_norm": 4.429482460021973, - "learning_rate": 3.8413279348409035e-05, - "loss": 0.6844, - "step": 78640 - }, - { - "epoch": 0.6952916423557701, - "grad_norm": 5.250709056854248, - "learning_rate": 3.841180596073717e-05, - "loss": 0.7061, - "step": 78650 - }, - { - "epoch": 0.6953800456160824, - "grad_norm": 2.9156718254089355, - "learning_rate": 3.841033257306529e-05, - "loss": 0.7087, - "step": 78660 - }, - { - "epoch": 0.6954684488763946, - "grad_norm": 3.9343905448913574, - "learning_rate": 3.840885918539343e-05, - "loss": 0.6798, - "step": 78670 - }, - { - "epoch": 0.6955568521367068, - "grad_norm": 2.015077829360962, - "learning_rate": 3.8407385797721555e-05, - "loss": 0.6106, - "step": 78680 - }, - { - "epoch": 0.695645255397019, - "grad_norm": 6.2138566970825195, - "learning_rate": 3.8405912410049684e-05, - "loss": 0.6293, - "step": 78690 - }, - { - "epoch": 0.6957336586573313, - "grad_norm": 17.735597610473633, - "learning_rate": 3.840443902237781e-05, - "loss": 0.7272, - "step": 78700 - }, - { - "epoch": 0.6958220619176435, - "grad_norm": 3.783289909362793, - "learning_rate": 3.840296563470595e-05, - "loss": 0.7488, - "step": 78710 - }, - { - "epoch": 0.6959104651779557, - "grad_norm": 4.88430118560791, - "learning_rate": 3.840149224703407e-05, - "loss": 0.7339, - "step": 78720 - }, - { - "epoch": 0.695998868438268, - "grad_norm": 15.371222496032715, - "learning_rate": 3.8400018859362204e-05, - "loss": 0.6698, - "step": 78730 - }, - { - "epoch": 0.6960872716985802, - "grad_norm": 2.1152405738830566, - "learning_rate": 3.839854547169033e-05, - "loss": 0.6102, - "step": 78740 - }, - { - "epoch": 0.6961756749588924, - "grad_norm": 2.173186779022217, - "learning_rate": 3.839707208401846e-05, - "loss": 0.6526, - "step": 78750 - }, - { - "epoch": 0.6962640782192048, - "grad_norm": 2.6727705001831055, - "learning_rate": 3.839559869634659e-05, - "loss": 0.7233, - "step": 78760 - }, - { - "epoch": 0.696352481479517, - "grad_norm": 7.480678081512451, - "learning_rate": 3.839412530867472e-05, - "loss": 0.6809, - "step": 78770 - }, - { - "epoch": 0.6964408847398292, - "grad_norm": 1.2236015796661377, - "learning_rate": 3.8392651921002846e-05, - "loss": 0.6014, - "step": 78780 - }, - { - "epoch": 0.6965292880001415, - "grad_norm": 1.2494572401046753, - "learning_rate": 3.839117853333098e-05, - "loss": 0.6668, - "step": 78790 - }, - { - "epoch": 0.6966176912604537, - "grad_norm": 8.251554489135742, - "learning_rate": 3.83897051456591e-05, - "loss": 0.6042, - "step": 78800 - }, - { - "epoch": 0.6967060945207659, - "grad_norm": 2.8776397705078125, - "learning_rate": 3.838823175798724e-05, - "loss": 0.752, - "step": 78810 - }, - { - "epoch": 0.6967944977810782, - "grad_norm": 1.4898128509521484, - "learning_rate": 3.8386758370315366e-05, - "loss": 0.5884, - "step": 78820 - }, - { - "epoch": 0.6968829010413904, - "grad_norm": 2.1654345989227295, - "learning_rate": 3.8385284982643494e-05, - "loss": 0.6226, - "step": 78830 - }, - { - "epoch": 0.6969713043017026, - "grad_norm": 3.406527519226074, - "learning_rate": 3.838381159497162e-05, - "loss": 0.6076, - "step": 78840 - }, - { - "epoch": 0.6970597075620149, - "grad_norm": 9.295869827270508, - "learning_rate": 3.838233820729976e-05, - "loss": 0.5279, - "step": 78850 - }, - { - "epoch": 0.6971481108223271, - "grad_norm": 3.8971216678619385, - "learning_rate": 3.838086481962788e-05, - "loss": 0.5792, - "step": 78860 - }, - { - "epoch": 0.6972365140826393, - "grad_norm": 3.4754998683929443, - "learning_rate": 3.8379391431956014e-05, - "loss": 0.7319, - "step": 78870 - }, - { - "epoch": 0.6973249173429517, - "grad_norm": 3.010288953781128, - "learning_rate": 3.8377918044284136e-05, - "loss": 0.6882, - "step": 78880 - }, - { - "epoch": 0.6974133206032639, - "grad_norm": 2.7883942127227783, - "learning_rate": 3.837644465661227e-05, - "loss": 0.6509, - "step": 78890 - }, - { - "epoch": 0.6975017238635761, - "grad_norm": 2.550469160079956, - "learning_rate": 3.83749712689404e-05, - "loss": 0.6652, - "step": 78900 - }, - { - "epoch": 0.6975901271238883, - "grad_norm": 1.3190661668777466, - "learning_rate": 3.837349788126853e-05, - "loss": 0.5776, - "step": 78910 - }, - { - "epoch": 0.6976785303842006, - "grad_norm": 2.149895668029785, - "learning_rate": 3.8372024493596656e-05, - "loss": 0.6269, - "step": 78920 - }, - { - "epoch": 0.6977669336445128, - "grad_norm": 2.7502822875976562, - "learning_rate": 3.837055110592479e-05, - "loss": 0.6426, - "step": 78930 - }, - { - "epoch": 0.697855336904825, - "grad_norm": 3.8479859828948975, - "learning_rate": 3.836907771825291e-05, - "loss": 0.7274, - "step": 78940 - }, - { - "epoch": 0.6979437401651373, - "grad_norm": 1.2449777126312256, - "learning_rate": 3.836760433058105e-05, - "loss": 0.7502, - "step": 78950 - }, - { - "epoch": 0.6980321434254495, - "grad_norm": 2.5838563442230225, - "learning_rate": 3.8366130942909176e-05, - "loss": 0.6146, - "step": 78960 - }, - { - "epoch": 0.6981205466857617, - "grad_norm": 1.2737388610839844, - "learning_rate": 3.8364657555237305e-05, - "loss": 0.7528, - "step": 78970 - }, - { - "epoch": 0.698208949946074, - "grad_norm": 2.6073715686798096, - "learning_rate": 3.836318416756543e-05, - "loss": 0.7847, - "step": 78980 - }, - { - "epoch": 0.6982973532063863, - "grad_norm": 3.2915992736816406, - "learning_rate": 3.836171077989356e-05, - "loss": 0.691, - "step": 78990 - }, - { - "epoch": 0.6983857564666985, - "grad_norm": 6.68314266204834, - "learning_rate": 3.836023739222169e-05, - "loss": 0.6805, - "step": 79000 - }, - { - "epoch": 0.6984741597270108, - "grad_norm": 2.470914125442505, - "learning_rate": 3.8358764004549825e-05, - "loss": 0.5996, - "step": 79010 - }, - { - "epoch": 0.698562562987323, - "grad_norm": 2.4620614051818848, - "learning_rate": 3.835729061687795e-05, - "loss": 0.6096, - "step": 79020 - }, - { - "epoch": 0.6986509662476352, - "grad_norm": 2.979200601577759, - "learning_rate": 3.835581722920608e-05, - "loss": 0.6128, - "step": 79030 - }, - { - "epoch": 0.6987393695079475, - "grad_norm": 1.9747117757797241, - "learning_rate": 3.835434384153421e-05, - "loss": 0.7464, - "step": 79040 - }, - { - "epoch": 0.6988277727682597, - "grad_norm": 1.1767827272415161, - "learning_rate": 3.835287045386234e-05, - "loss": 0.5912, - "step": 79050 - }, - { - "epoch": 0.6989161760285719, - "grad_norm": 1.072789192199707, - "learning_rate": 3.835139706619047e-05, - "loss": 0.6726, - "step": 79060 - }, - { - "epoch": 0.6990045792888842, - "grad_norm": 3.0794551372528076, - "learning_rate": 3.83499236785186e-05, - "loss": 0.6359, - "step": 79070 - }, - { - "epoch": 0.6990929825491964, - "grad_norm": 1.395095705986023, - "learning_rate": 3.834845029084673e-05, - "loss": 0.752, - "step": 79080 - }, - { - "epoch": 0.6991813858095086, - "grad_norm": 4.126559734344482, - "learning_rate": 3.834697690317486e-05, - "loss": 0.7603, - "step": 79090 - }, - { - "epoch": 0.6992697890698208, - "grad_norm": 5.317914962768555, - "learning_rate": 3.834550351550299e-05, - "loss": 0.5813, - "step": 79100 - }, - { - "epoch": 0.6993581923301332, - "grad_norm": 1.5926259756088257, - "learning_rate": 3.8344030127831115e-05, - "loss": 0.6125, - "step": 79110 - }, - { - "epoch": 0.6994465955904454, - "grad_norm": 1.7232309579849243, - "learning_rate": 3.8342556740159244e-05, - "loss": 0.5315, - "step": 79120 - }, - { - "epoch": 0.6995349988507576, - "grad_norm": 14.704235076904297, - "learning_rate": 3.834108335248737e-05, - "loss": 0.7286, - "step": 79130 - }, - { - "epoch": 0.6996234021110699, - "grad_norm": 1.7574563026428223, - "learning_rate": 3.833960996481551e-05, - "loss": 0.5799, - "step": 79140 - }, - { - "epoch": 0.6997118053713821, - "grad_norm": 1.215949296951294, - "learning_rate": 3.8338136577143635e-05, - "loss": 0.7365, - "step": 79150 - }, - { - "epoch": 0.6998002086316943, - "grad_norm": 5.2433881759643555, - "learning_rate": 3.8336663189471764e-05, - "loss": 0.6206, - "step": 79160 - }, - { - "epoch": 0.6998886118920066, - "grad_norm": 3.9595348834991455, - "learning_rate": 3.833518980179989e-05, - "loss": 0.6593, - "step": 79170 - }, - { - "epoch": 0.6999770151523188, - "grad_norm": 4.542568683624268, - "learning_rate": 3.833371641412802e-05, - "loss": 0.6442, - "step": 79180 - }, - { - "epoch": 0.700065418412631, - "grad_norm": 8.986078262329102, - "learning_rate": 3.833224302645615e-05, - "loss": 0.7199, - "step": 79190 - }, - { - "epoch": 0.7001538216729433, - "grad_norm": 4.744601249694824, - "learning_rate": 3.8330769638784284e-05, - "loss": 0.6518, - "step": 79200 - }, - { - "epoch": 0.7002422249332555, - "grad_norm": 1.6164606809616089, - "learning_rate": 3.832929625111241e-05, - "loss": 0.731, - "step": 79210 - }, - { - "epoch": 0.7003306281935677, - "grad_norm": 2.7258386611938477, - "learning_rate": 3.832782286344054e-05, - "loss": 0.6669, - "step": 79220 - }, - { - "epoch": 0.7004190314538801, - "grad_norm": 1.7344862222671509, - "learning_rate": 3.832634947576867e-05, - "loss": 0.5681, - "step": 79230 - }, - { - "epoch": 0.7005074347141923, - "grad_norm": 4.408559799194336, - "learning_rate": 3.83248760880968e-05, - "loss": 0.6261, - "step": 79240 - }, - { - "epoch": 0.7005958379745045, - "grad_norm": 1.9056577682495117, - "learning_rate": 3.8323402700424926e-05, - "loss": 0.6321, - "step": 79250 - }, - { - "epoch": 0.7006842412348168, - "grad_norm": 2.941056966781616, - "learning_rate": 3.832192931275306e-05, - "loss": 0.5622, - "step": 79260 - }, - { - "epoch": 0.700772644495129, - "grad_norm": 2.356008291244507, - "learning_rate": 3.832045592508118e-05, - "loss": 0.6902, - "step": 79270 - }, - { - "epoch": 0.7008610477554412, - "grad_norm": 4.120590686798096, - "learning_rate": 3.831898253740932e-05, - "loss": 0.7524, - "step": 79280 - }, - { - "epoch": 0.7009494510157535, - "grad_norm": 3.3727569580078125, - "learning_rate": 3.8317509149737446e-05, - "loss": 0.6958, - "step": 79290 - }, - { - "epoch": 0.7010378542760657, - "grad_norm": 7.907055854797363, - "learning_rate": 3.8316035762065574e-05, - "loss": 0.7343, - "step": 79300 - }, - { - "epoch": 0.7011262575363779, - "grad_norm": 1.464552640914917, - "learning_rate": 3.83145623743937e-05, - "loss": 0.6277, - "step": 79310 - }, - { - "epoch": 0.7012146607966901, - "grad_norm": 1.5897775888442993, - "learning_rate": 3.831308898672184e-05, - "loss": 0.6797, - "step": 79320 - }, - { - "epoch": 0.7013030640570024, - "grad_norm": 3.080288887023926, - "learning_rate": 3.831161559904996e-05, - "loss": 0.667, - "step": 79330 - }, - { - "epoch": 0.7013914673173146, - "grad_norm": 3.283644676208496, - "learning_rate": 3.8310142211378094e-05, - "loss": 0.6307, - "step": 79340 - }, - { - "epoch": 0.701479870577627, - "grad_norm": 1.221320390701294, - "learning_rate": 3.8308668823706216e-05, - "loss": 0.6109, - "step": 79350 - }, - { - "epoch": 0.7015682738379392, - "grad_norm": 3.7016940116882324, - "learning_rate": 3.830719543603435e-05, - "loss": 0.6676, - "step": 79360 - }, - { - "epoch": 0.7016566770982514, - "grad_norm": 4.4188432693481445, - "learning_rate": 3.830572204836248e-05, - "loss": 0.646, - "step": 79370 - }, - { - "epoch": 0.7017450803585636, - "grad_norm": 2.582963228225708, - "learning_rate": 3.830424866069061e-05, - "loss": 0.7603, - "step": 79380 - }, - { - "epoch": 0.7018334836188759, - "grad_norm": 38.37507629394531, - "learning_rate": 3.8302775273018736e-05, - "loss": 0.6606, - "step": 79390 - }, - { - "epoch": 0.7019218868791881, - "grad_norm": 1.8954352140426636, - "learning_rate": 3.830130188534687e-05, - "loss": 0.7488, - "step": 79400 - }, - { - "epoch": 0.7020102901395003, - "grad_norm": 4.248343467712402, - "learning_rate": 3.829982849767499e-05, - "loss": 0.6453, - "step": 79410 - }, - { - "epoch": 0.7020986933998126, - "grad_norm": 10.113481521606445, - "learning_rate": 3.829835511000313e-05, - "loss": 0.6714, - "step": 79420 - }, - { - "epoch": 0.7021870966601248, - "grad_norm": 3.6906447410583496, - "learning_rate": 3.8296881722331256e-05, - "loss": 0.7333, - "step": 79430 - }, - { - "epoch": 0.702275499920437, - "grad_norm": 7.084136962890625, - "learning_rate": 3.8295408334659385e-05, - "loss": 0.7472, - "step": 79440 - }, - { - "epoch": 0.7023639031807493, - "grad_norm": 7.299503326416016, - "learning_rate": 3.829393494698751e-05, - "loss": 0.7032, - "step": 79450 - }, - { - "epoch": 0.7024523064410615, - "grad_norm": 1.9515361785888672, - "learning_rate": 3.829246155931565e-05, - "loss": 0.7094, - "step": 79460 - }, - { - "epoch": 0.7025407097013738, - "grad_norm": 2.1417977809906006, - "learning_rate": 3.829098817164377e-05, - "loss": 0.5656, - "step": 79470 - }, - { - "epoch": 0.7026291129616861, - "grad_norm": 3.552403211593628, - "learning_rate": 3.8289514783971905e-05, - "loss": 0.6753, - "step": 79480 - }, - { - "epoch": 0.7027175162219983, - "grad_norm": 10.532079696655273, - "learning_rate": 3.8288041396300027e-05, - "loss": 0.7718, - "step": 79490 - }, - { - "epoch": 0.7028059194823105, - "grad_norm": 1.7979345321655273, - "learning_rate": 3.828656800862816e-05, - "loss": 0.6526, - "step": 79500 - }, - { - "epoch": 0.7028943227426228, - "grad_norm": 10.65814208984375, - "learning_rate": 3.828509462095629e-05, - "loss": 0.7484, - "step": 79510 - }, - { - "epoch": 0.702982726002935, - "grad_norm": 2.0553746223449707, - "learning_rate": 3.828362123328442e-05, - "loss": 0.6374, - "step": 79520 - }, - { - "epoch": 0.7030711292632472, - "grad_norm": 1.2106977701187134, - "learning_rate": 3.828214784561255e-05, - "loss": 0.688, - "step": 79530 - }, - { - "epoch": 0.7031595325235594, - "grad_norm": 2.429676055908203, - "learning_rate": 3.828067445794068e-05, - "loss": 0.7162, - "step": 79540 - }, - { - "epoch": 0.7032479357838717, - "grad_norm": 5.542989253997803, - "learning_rate": 3.8279201070268803e-05, - "loss": 0.709, - "step": 79550 - }, - { - "epoch": 0.7033363390441839, - "grad_norm": 4.563215732574463, - "learning_rate": 3.827772768259694e-05, - "loss": 0.64, - "step": 79560 - }, - { - "epoch": 0.7034247423044961, - "grad_norm": 3.350586414337158, - "learning_rate": 3.827625429492507e-05, - "loss": 0.7658, - "step": 79570 - }, - { - "epoch": 0.7035131455648085, - "grad_norm": 3.2068681716918945, - "learning_rate": 3.8274780907253195e-05, - "loss": 0.8937, - "step": 79580 - }, - { - "epoch": 0.7036015488251207, - "grad_norm": 2.844890832901001, - "learning_rate": 3.8273307519581324e-05, - "loss": 0.5628, - "step": 79590 - }, - { - "epoch": 0.703689952085433, - "grad_norm": 22.12971305847168, - "learning_rate": 3.827183413190945e-05, - "loss": 0.6472, - "step": 79600 - }, - { - "epoch": 0.7037783553457452, - "grad_norm": 3.8450522422790527, - "learning_rate": 3.827036074423758e-05, - "loss": 0.5785, - "step": 79610 - }, - { - "epoch": 0.7038667586060574, - "grad_norm": 1.7838504314422607, - "learning_rate": 3.8268887356565715e-05, - "loss": 0.688, - "step": 79620 - }, - { - "epoch": 0.7039551618663696, - "grad_norm": 6.339825630187988, - "learning_rate": 3.826741396889384e-05, - "loss": 0.7111, - "step": 79630 - }, - { - "epoch": 0.7040435651266819, - "grad_norm": 3.405885696411133, - "learning_rate": 3.826594058122197e-05, - "loss": 0.756, - "step": 79640 - }, - { - "epoch": 0.7041319683869941, - "grad_norm": 1.351367473602295, - "learning_rate": 3.82644671935501e-05, - "loss": 0.7055, - "step": 79650 - }, - { - "epoch": 0.7042203716473063, - "grad_norm": 2.725022554397583, - "learning_rate": 3.826299380587823e-05, - "loss": 0.7049, - "step": 79660 - }, - { - "epoch": 0.7043087749076186, - "grad_norm": 18.686199188232422, - "learning_rate": 3.826152041820636e-05, - "loss": 0.6502, - "step": 79670 - }, - { - "epoch": 0.7043971781679308, - "grad_norm": 2.0062692165374756, - "learning_rate": 3.826004703053449e-05, - "loss": 0.6214, - "step": 79680 - }, - { - "epoch": 0.704485581428243, - "grad_norm": 1.0387845039367676, - "learning_rate": 3.8258573642862614e-05, - "loss": 0.6143, - "step": 79690 - }, - { - "epoch": 0.7045739846885554, - "grad_norm": 10.7262601852417, - "learning_rate": 3.825710025519075e-05, - "loss": 0.6674, - "step": 79700 - }, - { - "epoch": 0.7046623879488676, - "grad_norm": 2.371840238571167, - "learning_rate": 3.825562686751887e-05, - "loss": 0.8116, - "step": 79710 - }, - { - "epoch": 0.7047507912091798, - "grad_norm": 5.077537536621094, - "learning_rate": 3.8254153479847006e-05, - "loss": 0.651, - "step": 79720 - }, - { - "epoch": 0.7048391944694921, - "grad_norm": 4.237266540527344, - "learning_rate": 3.8252680092175134e-05, - "loss": 0.6247, - "step": 79730 - }, - { - "epoch": 0.7049275977298043, - "grad_norm": 10.335536003112793, - "learning_rate": 3.825120670450326e-05, - "loss": 0.6308, - "step": 79740 - }, - { - "epoch": 0.7050160009901165, - "grad_norm": 4.275367259979248, - "learning_rate": 3.824973331683139e-05, - "loss": 0.7129, - "step": 79750 - }, - { - "epoch": 0.7051044042504288, - "grad_norm": 2.2424392700195312, - "learning_rate": 3.8248259929159526e-05, - "loss": 0.5872, - "step": 79760 - }, - { - "epoch": 0.705192807510741, - "grad_norm": 7.280856132507324, - "learning_rate": 3.824678654148765e-05, - "loss": 0.5647, - "step": 79770 - }, - { - "epoch": 0.7052812107710532, - "grad_norm": 17.15814971923828, - "learning_rate": 3.824531315381578e-05, - "loss": 0.7838, - "step": 79780 - }, - { - "epoch": 0.7053696140313654, - "grad_norm": 1.6910101175308228, - "learning_rate": 3.824383976614391e-05, - "loss": 0.5949, - "step": 79790 - }, - { - "epoch": 0.7054580172916777, - "grad_norm": 5.263885021209717, - "learning_rate": 3.824236637847204e-05, - "loss": 0.6636, - "step": 79800 - }, - { - "epoch": 0.7055464205519899, - "grad_norm": 6.243316650390625, - "learning_rate": 3.824089299080017e-05, - "loss": 0.6419, - "step": 79810 - }, - { - "epoch": 0.7056348238123022, - "grad_norm": 3.959357976913452, - "learning_rate": 3.8239419603128296e-05, - "loss": 0.6747, - "step": 79820 - }, - { - "epoch": 0.7057232270726145, - "grad_norm": 5.906799793243408, - "learning_rate": 3.8237946215456424e-05, - "loss": 0.647, - "step": 79830 - }, - { - "epoch": 0.7058116303329267, - "grad_norm": 7.207847595214844, - "learning_rate": 3.823647282778456e-05, - "loss": 0.7708, - "step": 79840 - }, - { - "epoch": 0.7059000335932389, - "grad_norm": 4.120061874389648, - "learning_rate": 3.823499944011268e-05, - "loss": 0.6943, - "step": 79850 - }, - { - "epoch": 0.7059884368535512, - "grad_norm": 3.261446237564087, - "learning_rate": 3.8233526052440816e-05, - "loss": 0.6548, - "step": 79860 - }, - { - "epoch": 0.7060768401138634, - "grad_norm": 5.274998664855957, - "learning_rate": 3.8232052664768945e-05, - "loss": 0.555, - "step": 79870 - }, - { - "epoch": 0.7061652433741756, - "grad_norm": 13.627988815307617, - "learning_rate": 3.823057927709707e-05, - "loss": 0.5523, - "step": 79880 - }, - { - "epoch": 0.7062536466344879, - "grad_norm": 5.227761745452881, - "learning_rate": 3.82291058894252e-05, - "loss": 0.7526, - "step": 79890 - }, - { - "epoch": 0.7063420498948001, - "grad_norm": 7.97208309173584, - "learning_rate": 3.8227632501753336e-05, - "loss": 0.688, - "step": 79900 - }, - { - "epoch": 0.7064304531551123, - "grad_norm": 2.218047618865967, - "learning_rate": 3.822615911408146e-05, - "loss": 0.6018, - "step": 79910 - }, - { - "epoch": 0.7065188564154246, - "grad_norm": 4.144509792327881, - "learning_rate": 3.822468572640959e-05, - "loss": 0.8467, - "step": 79920 - }, - { - "epoch": 0.7066072596757368, - "grad_norm": 2.490694284439087, - "learning_rate": 3.822321233873772e-05, - "loss": 0.5365, - "step": 79930 - }, - { - "epoch": 0.7066956629360491, - "grad_norm": 2.9271297454833984, - "learning_rate": 3.822173895106585e-05, - "loss": 0.6453, - "step": 79940 - }, - { - "epoch": 0.7067840661963614, - "grad_norm": 6.936543941497803, - "learning_rate": 3.822026556339398e-05, - "loss": 0.6204, - "step": 79950 - }, - { - "epoch": 0.7068724694566736, - "grad_norm": 1.7527416944503784, - "learning_rate": 3.8218792175722107e-05, - "loss": 0.5657, - "step": 79960 - }, - { - "epoch": 0.7069608727169858, - "grad_norm": 5.678459167480469, - "learning_rate": 3.8217318788050235e-05, - "loss": 0.6058, - "step": 79970 - }, - { - "epoch": 0.707049275977298, - "grad_norm": 1.9363157749176025, - "learning_rate": 3.821584540037837e-05, - "loss": 0.6623, - "step": 79980 - }, - { - "epoch": 0.7071376792376103, - "grad_norm": 9.029206275939941, - "learning_rate": 3.82143720127065e-05, - "loss": 0.6865, - "step": 79990 - }, - { - "epoch": 0.7072260824979225, - "grad_norm": 1.8255505561828613, - "learning_rate": 3.821289862503463e-05, - "loss": 0.6503, - "step": 80000 - }, - { - "epoch": 0.7073144857582347, - "grad_norm": 4.427699565887451, - "learning_rate": 3.8211425237362755e-05, - "loss": 0.7285, - "step": 80010 - }, - { - "epoch": 0.707402889018547, - "grad_norm": 3.238001585006714, - "learning_rate": 3.8209951849690883e-05, - "loss": 0.7121, - "step": 80020 - }, - { - "epoch": 0.7074912922788592, - "grad_norm": 2.349195718765259, - "learning_rate": 3.820847846201901e-05, - "loss": 0.8094, - "step": 80030 - }, - { - "epoch": 0.7075796955391714, - "grad_norm": 8.461559295654297, - "learning_rate": 3.820700507434715e-05, - "loss": 0.8172, - "step": 80040 - }, - { - "epoch": 0.7076680987994838, - "grad_norm": 2.5984296798706055, - "learning_rate": 3.8205531686675275e-05, - "loss": 0.6721, - "step": 80050 - }, - { - "epoch": 0.707756502059796, - "grad_norm": 3.524854898452759, - "learning_rate": 3.8204058299003404e-05, - "loss": 0.5735, - "step": 80060 - }, - { - "epoch": 0.7078449053201082, - "grad_norm": 1.5843122005462646, - "learning_rate": 3.820258491133153e-05, - "loss": 0.7668, - "step": 80070 - }, - { - "epoch": 0.7079333085804205, - "grad_norm": 1.318343162536621, - "learning_rate": 3.820111152365966e-05, - "loss": 0.6858, - "step": 80080 - }, - { - "epoch": 0.7080217118407327, - "grad_norm": 1.6608740091323853, - "learning_rate": 3.819963813598779e-05, - "loss": 0.516, - "step": 80090 - }, - { - "epoch": 0.7081101151010449, - "grad_norm": 3.6183922290802, - "learning_rate": 3.819816474831592e-05, - "loss": 0.6425, - "step": 80100 - }, - { - "epoch": 0.7081985183613572, - "grad_norm": 8.370306015014648, - "learning_rate": 3.819669136064405e-05, - "loss": 0.8753, - "step": 80110 - }, - { - "epoch": 0.7082869216216694, - "grad_norm": 7.38960075378418, - "learning_rate": 3.819521797297218e-05, - "loss": 0.589, - "step": 80120 - }, - { - "epoch": 0.7083753248819816, - "grad_norm": 1.351130723953247, - "learning_rate": 3.819374458530031e-05, - "loss": 0.6062, - "step": 80130 - }, - { - "epoch": 0.7084637281422939, - "grad_norm": 4.630609512329102, - "learning_rate": 3.819227119762844e-05, - "loss": 0.6723, - "step": 80140 - }, - { - "epoch": 0.7085521314026061, - "grad_norm": 4.845788955688477, - "learning_rate": 3.8190797809956566e-05, - "loss": 0.6207, - "step": 80150 - }, - { - "epoch": 0.7086405346629183, - "grad_norm": 4.605804920196533, - "learning_rate": 3.8189324422284694e-05, - "loss": 0.7058, - "step": 80160 - }, - { - "epoch": 0.7087289379232307, - "grad_norm": 1.0693213939666748, - "learning_rate": 3.818785103461283e-05, - "loss": 0.6121, - "step": 80170 - }, - { - "epoch": 0.7088173411835429, - "grad_norm": 3.252079725265503, - "learning_rate": 3.818637764694095e-05, - "loss": 0.7348, - "step": 80180 - }, - { - "epoch": 0.7089057444438551, - "grad_norm": 3.078933000564575, - "learning_rate": 3.8184904259269086e-05, - "loss": 0.7634, - "step": 80190 - }, - { - "epoch": 0.7089941477041674, - "grad_norm": 2.09808349609375, - "learning_rate": 3.8183430871597214e-05, - "loss": 0.6455, - "step": 80200 - }, - { - "epoch": 0.7090825509644796, - "grad_norm": 8.699748992919922, - "learning_rate": 3.818195748392534e-05, - "loss": 0.7876, - "step": 80210 - }, - { - "epoch": 0.7091709542247918, - "grad_norm": 6.0032734870910645, - "learning_rate": 3.818048409625347e-05, - "loss": 0.6613, - "step": 80220 - }, - { - "epoch": 0.709259357485104, - "grad_norm": 7.0532002449035645, - "learning_rate": 3.8179010708581606e-05, - "loss": 0.625, - "step": 80230 - }, - { - "epoch": 0.7093477607454163, - "grad_norm": 3.107225179672241, - "learning_rate": 3.817753732090973e-05, - "loss": 0.7048, - "step": 80240 - }, - { - "epoch": 0.7094361640057285, - "grad_norm": 4.6212873458862305, - "learning_rate": 3.817606393323786e-05, - "loss": 0.7531, - "step": 80250 - }, - { - "epoch": 0.7095245672660407, - "grad_norm": 2.4628825187683105, - "learning_rate": 3.817459054556599e-05, - "loss": 0.7099, - "step": 80260 - }, - { - "epoch": 0.709612970526353, - "grad_norm": 14.950610160827637, - "learning_rate": 3.817311715789412e-05, - "loss": 0.7147, - "step": 80270 - }, - { - "epoch": 0.7097013737866652, - "grad_norm": 1.1862995624542236, - "learning_rate": 3.817164377022225e-05, - "loss": 0.5203, - "step": 80280 - }, - { - "epoch": 0.7097897770469775, - "grad_norm": 1.3501532077789307, - "learning_rate": 3.8170170382550376e-05, - "loss": 0.7251, - "step": 80290 - }, - { - "epoch": 0.7098781803072898, - "grad_norm": 3.677821159362793, - "learning_rate": 3.8168696994878505e-05, - "loss": 0.4929, - "step": 80300 - }, - { - "epoch": 0.709966583567602, - "grad_norm": 1.737152099609375, - "learning_rate": 3.816722360720664e-05, - "loss": 0.7541, - "step": 80310 - }, - { - "epoch": 0.7100549868279142, - "grad_norm": 1.7136890888214111, - "learning_rate": 3.816575021953476e-05, - "loss": 0.6292, - "step": 80320 - }, - { - "epoch": 0.7101433900882265, - "grad_norm": 6.275796890258789, - "learning_rate": 3.8164276831862896e-05, - "loss": 0.5734, - "step": 80330 - }, - { - "epoch": 0.7102317933485387, - "grad_norm": 5.169858455657959, - "learning_rate": 3.8162803444191025e-05, - "loss": 0.7539, - "step": 80340 - }, - { - "epoch": 0.7103201966088509, - "grad_norm": 3.0072147846221924, - "learning_rate": 3.816133005651915e-05, - "loss": 0.6728, - "step": 80350 - }, - { - "epoch": 0.7104085998691632, - "grad_norm": 2.848865270614624, - "learning_rate": 3.815985666884728e-05, - "loss": 0.6994, - "step": 80360 - }, - { - "epoch": 0.7104970031294754, - "grad_norm": 1.6073395013809204, - "learning_rate": 3.8158383281175417e-05, - "loss": 0.5958, - "step": 80370 - }, - { - "epoch": 0.7105854063897876, - "grad_norm": 5.001084804534912, - "learning_rate": 3.815690989350354e-05, - "loss": 0.6145, - "step": 80380 - }, - { - "epoch": 0.7106738096500999, - "grad_norm": 3.050320863723755, - "learning_rate": 3.815543650583167e-05, - "loss": 0.627, - "step": 80390 - }, - { - "epoch": 0.7107622129104121, - "grad_norm": 6.285628318786621, - "learning_rate": 3.81539631181598e-05, - "loss": 0.711, - "step": 80400 - }, - { - "epoch": 0.7108506161707244, - "grad_norm": 5.953503131866455, - "learning_rate": 3.815248973048793e-05, - "loss": 0.6785, - "step": 80410 - }, - { - "epoch": 0.7109390194310367, - "grad_norm": 7.071252346038818, - "learning_rate": 3.815101634281606e-05, - "loss": 0.7448, - "step": 80420 - }, - { - "epoch": 0.7110274226913489, - "grad_norm": 5.907627582550049, - "learning_rate": 3.814954295514419e-05, - "loss": 0.639, - "step": 80430 - }, - { - "epoch": 0.7111158259516611, - "grad_norm": 2.3476157188415527, - "learning_rate": 3.8148069567472315e-05, - "loss": 0.7414, - "step": 80440 - }, - { - "epoch": 0.7112042292119733, - "grad_norm": 6.545152187347412, - "learning_rate": 3.814659617980045e-05, - "loss": 0.7091, - "step": 80450 - }, - { - "epoch": 0.7112926324722856, - "grad_norm": 3.210423469543457, - "learning_rate": 3.814512279212857e-05, - "loss": 0.794, - "step": 80460 - }, - { - "epoch": 0.7113810357325978, - "grad_norm": 3.8754019737243652, - "learning_rate": 3.814364940445671e-05, - "loss": 0.5328, - "step": 80470 - }, - { - "epoch": 0.71146943899291, - "grad_norm": 3.9626824855804443, - "learning_rate": 3.8142176016784835e-05, - "loss": 0.5803, - "step": 80480 - }, - { - "epoch": 0.7115578422532223, - "grad_norm": 5.242998123168945, - "learning_rate": 3.8140702629112964e-05, - "loss": 0.6155, - "step": 80490 - }, - { - "epoch": 0.7116462455135345, - "grad_norm": 5.1946120262146, - "learning_rate": 3.813922924144109e-05, - "loss": 0.7128, - "step": 80500 - }, - { - "epoch": 0.7117346487738467, - "grad_norm": 5.955926895141602, - "learning_rate": 3.813775585376923e-05, - "loss": 0.6745, - "step": 80510 - }, - { - "epoch": 0.711823052034159, - "grad_norm": 3.4583559036254883, - "learning_rate": 3.813628246609735e-05, - "loss": 0.8521, - "step": 80520 - }, - { - "epoch": 0.7119114552944713, - "grad_norm": 3.571674346923828, - "learning_rate": 3.8134809078425484e-05, - "loss": 0.6185, - "step": 80530 - }, - { - "epoch": 0.7119998585547835, - "grad_norm": 1.334622859954834, - "learning_rate": 3.8133335690753605e-05, - "loss": 0.5735, - "step": 80540 - }, - { - "epoch": 0.7120882618150958, - "grad_norm": 0.7861053943634033, - "learning_rate": 3.813186230308174e-05, - "loss": 0.4735, - "step": 80550 - }, - { - "epoch": 0.712176665075408, - "grad_norm": 3.4673657417297363, - "learning_rate": 3.813038891540987e-05, - "loss": 0.7406, - "step": 80560 - }, - { - "epoch": 0.7122650683357202, - "grad_norm": 4.715671539306641, - "learning_rate": 3.8128915527738e-05, - "loss": 0.6631, - "step": 80570 - }, - { - "epoch": 0.7123534715960325, - "grad_norm": 5.260922908782959, - "learning_rate": 3.8127442140066126e-05, - "loss": 0.5585, - "step": 80580 - }, - { - "epoch": 0.7124418748563447, - "grad_norm": 8.380212783813477, - "learning_rate": 3.812596875239426e-05, - "loss": 0.7258, - "step": 80590 - }, - { - "epoch": 0.7125302781166569, - "grad_norm": 3.363799810409546, - "learning_rate": 3.812449536472238e-05, - "loss": 0.6766, - "step": 80600 - }, - { - "epoch": 0.7126186813769692, - "grad_norm": 2.8767929077148438, - "learning_rate": 3.812302197705052e-05, - "loss": 0.7858, - "step": 80610 - }, - { - "epoch": 0.7127070846372814, - "grad_norm": 2.8520190715789795, - "learning_rate": 3.8121548589378646e-05, - "loss": 0.6691, - "step": 80620 - }, - { - "epoch": 0.7127954878975936, - "grad_norm": 3.7284460067749023, - "learning_rate": 3.8120075201706774e-05, - "loss": 0.5819, - "step": 80630 - }, - { - "epoch": 0.712883891157906, - "grad_norm": 4.8409528732299805, - "learning_rate": 3.81186018140349e-05, - "loss": 0.6469, - "step": 80640 - }, - { - "epoch": 0.7129722944182182, - "grad_norm": 7.845481872558594, - "learning_rate": 3.811712842636303e-05, - "loss": 0.7873, - "step": 80650 - }, - { - "epoch": 0.7130606976785304, - "grad_norm": 2.3964896202087402, - "learning_rate": 3.811565503869116e-05, - "loss": 0.8076, - "step": 80660 - }, - { - "epoch": 0.7131491009388427, - "grad_norm": 4.14758825302124, - "learning_rate": 3.8114181651019294e-05, - "loss": 0.6822, - "step": 80670 - }, - { - "epoch": 0.7132375041991549, - "grad_norm": 1.3765736818313599, - "learning_rate": 3.8112708263347416e-05, - "loss": 0.5771, - "step": 80680 - }, - { - "epoch": 0.7133259074594671, - "grad_norm": 3.637725830078125, - "learning_rate": 3.811123487567555e-05, - "loss": 0.6483, - "step": 80690 - }, - { - "epoch": 0.7134143107197793, - "grad_norm": 1.5444157123565674, - "learning_rate": 3.810976148800368e-05, - "loss": 0.7043, - "step": 80700 - }, - { - "epoch": 0.7135027139800916, - "grad_norm": 1.9112391471862793, - "learning_rate": 3.810828810033181e-05, - "loss": 0.6185, - "step": 80710 - }, - { - "epoch": 0.7135911172404038, - "grad_norm": 3.378286361694336, - "learning_rate": 3.8106814712659936e-05, - "loss": 0.6345, - "step": 80720 - }, - { - "epoch": 0.713679520500716, - "grad_norm": 7.995019912719727, - "learning_rate": 3.810534132498807e-05, - "loss": 0.5848, - "step": 80730 - }, - { - "epoch": 0.7137679237610283, - "grad_norm": 3.080310583114624, - "learning_rate": 3.810386793731619e-05, - "loss": 0.6683, - "step": 80740 - }, - { - "epoch": 0.7138563270213405, - "grad_norm": 2.683173418045044, - "learning_rate": 3.810239454964433e-05, - "loss": 0.6971, - "step": 80750 - }, - { - "epoch": 0.7139447302816528, - "grad_norm": 5.536104679107666, - "learning_rate": 3.810092116197245e-05, - "loss": 0.6663, - "step": 80760 - }, - { - "epoch": 0.7140331335419651, - "grad_norm": 4.201199054718018, - "learning_rate": 3.8099447774300585e-05, - "loss": 0.6005, - "step": 80770 - }, - { - "epoch": 0.7141215368022773, - "grad_norm": 10.316539764404297, - "learning_rate": 3.809797438662871e-05, - "loss": 0.7354, - "step": 80780 - }, - { - "epoch": 0.7142099400625895, - "grad_norm": 3.480752468109131, - "learning_rate": 3.809650099895684e-05, - "loss": 0.758, - "step": 80790 - }, - { - "epoch": 0.7142983433229018, - "grad_norm": 6.061373233795166, - "learning_rate": 3.809502761128497e-05, - "loss": 0.6605, - "step": 80800 - }, - { - "epoch": 0.714386746583214, - "grad_norm": 2.2674710750579834, - "learning_rate": 3.8093554223613105e-05, - "loss": 0.6302, - "step": 80810 - }, - { - "epoch": 0.7144751498435262, - "grad_norm": 4.011229515075684, - "learning_rate": 3.8092080835941226e-05, - "loss": 0.6428, - "step": 80820 - }, - { - "epoch": 0.7145635531038385, - "grad_norm": 2.183293104171753, - "learning_rate": 3.809060744826936e-05, - "loss": 0.6736, - "step": 80830 - }, - { - "epoch": 0.7146519563641507, - "grad_norm": 4.528976917266846, - "learning_rate": 3.808913406059749e-05, - "loss": 0.645, - "step": 80840 - }, - { - "epoch": 0.7147403596244629, - "grad_norm": 6.955418109893799, - "learning_rate": 3.808766067292562e-05, - "loss": 0.6069, - "step": 80850 - }, - { - "epoch": 0.7148287628847751, - "grad_norm": 3.8275115489959717, - "learning_rate": 3.8086187285253747e-05, - "loss": 0.7253, - "step": 80860 - }, - { - "epoch": 0.7149171661450874, - "grad_norm": 5.179789066314697, - "learning_rate": 3.808471389758188e-05, - "loss": 0.6367, - "step": 80870 - }, - { - "epoch": 0.7150055694053997, - "grad_norm": 2.03383469581604, - "learning_rate": 3.808324050991e-05, - "loss": 0.755, - "step": 80880 - }, - { - "epoch": 0.715093972665712, - "grad_norm": 2.186598777770996, - "learning_rate": 3.808176712223814e-05, - "loss": 0.7652, - "step": 80890 - }, - { - "epoch": 0.7151823759260242, - "grad_norm": 4.560229778289795, - "learning_rate": 3.808029373456627e-05, - "loss": 0.6176, - "step": 80900 - }, - { - "epoch": 0.7152707791863364, - "grad_norm": 1.5504982471466064, - "learning_rate": 3.8078820346894395e-05, - "loss": 0.7417, - "step": 80910 - }, - { - "epoch": 0.7153591824466486, - "grad_norm": 1.2862977981567383, - "learning_rate": 3.8077346959222523e-05, - "loss": 0.636, - "step": 80920 - }, - { - "epoch": 0.7154475857069609, - "grad_norm": 2.8133223056793213, - "learning_rate": 3.807587357155065e-05, - "loss": 0.7344, - "step": 80930 - }, - { - "epoch": 0.7155359889672731, - "grad_norm": 3.200807809829712, - "learning_rate": 3.807440018387878e-05, - "loss": 0.7446, - "step": 80940 - }, - { - "epoch": 0.7156243922275853, - "grad_norm": 11.442713737487793, - "learning_rate": 3.8072926796206915e-05, - "loss": 0.77, - "step": 80950 - }, - { - "epoch": 0.7157127954878976, - "grad_norm": 1.8898372650146484, - "learning_rate": 3.8071453408535044e-05, - "loss": 0.6343, - "step": 80960 - }, - { - "epoch": 0.7158011987482098, - "grad_norm": 4.321465969085693, - "learning_rate": 3.806998002086317e-05, - "loss": 0.7081, - "step": 80970 - }, - { - "epoch": 0.715889602008522, - "grad_norm": 1.2546701431274414, - "learning_rate": 3.80685066331913e-05, - "loss": 0.6125, - "step": 80980 - }, - { - "epoch": 0.7159780052688343, - "grad_norm": 2.626376152038574, - "learning_rate": 3.806703324551943e-05, - "loss": 0.6884, - "step": 80990 - }, - { - "epoch": 0.7160664085291466, - "grad_norm": 1.4480594396591187, - "learning_rate": 3.806555985784756e-05, - "loss": 0.6814, - "step": 81000 - }, - { - "epoch": 0.7161548117894588, - "grad_norm": 3.1579535007476807, - "learning_rate": 3.8064086470175685e-05, - "loss": 0.6906, - "step": 81010 - }, - { - "epoch": 0.7162432150497711, - "grad_norm": 1.7976185083389282, - "learning_rate": 3.806261308250382e-05, - "loss": 0.6705, - "step": 81020 - }, - { - "epoch": 0.7163316183100833, - "grad_norm": 1.3817342519760132, - "learning_rate": 3.806113969483195e-05, - "loss": 0.7549, - "step": 81030 - }, - { - "epoch": 0.7164200215703955, - "grad_norm": 1.3976129293441772, - "learning_rate": 3.805966630716008e-05, - "loss": 0.6479, - "step": 81040 - }, - { - "epoch": 0.7165084248307078, - "grad_norm": 3.1552183628082275, - "learning_rate": 3.8058192919488206e-05, - "loss": 0.6905, - "step": 81050 - }, - { - "epoch": 0.71659682809102, - "grad_norm": 6.283140659332275, - "learning_rate": 3.8056719531816334e-05, - "loss": 0.5697, - "step": 81060 - }, - { - "epoch": 0.7166852313513322, - "grad_norm": 3.000025510787964, - "learning_rate": 3.805524614414446e-05, - "loss": 0.7089, - "step": 81070 - }, - { - "epoch": 0.7167736346116445, - "grad_norm": 3.9269211292266846, - "learning_rate": 3.80537727564726e-05, - "loss": 0.659, - "step": 81080 - }, - { - "epoch": 0.7168620378719567, - "grad_norm": 1.69406259059906, - "learning_rate": 3.8052299368800726e-05, - "loss": 0.7369, - "step": 81090 - }, - { - "epoch": 0.7169504411322689, - "grad_norm": 2.697906017303467, - "learning_rate": 3.8050825981128854e-05, - "loss": 0.6946, - "step": 81100 - }, - { - "epoch": 0.7170388443925813, - "grad_norm": 3.463474750518799, - "learning_rate": 3.804935259345698e-05, - "loss": 0.6458, - "step": 81110 - }, - { - "epoch": 0.7171272476528935, - "grad_norm": 1.4377788305282593, - "learning_rate": 3.804787920578511e-05, - "loss": 0.7313, - "step": 81120 - }, - { - "epoch": 0.7172156509132057, - "grad_norm": 4.773625373840332, - "learning_rate": 3.804640581811324e-05, - "loss": 0.598, - "step": 81130 - }, - { - "epoch": 0.717304054173518, - "grad_norm": 4.413767337799072, - "learning_rate": 3.8044932430441374e-05, - "loss": 0.6287, - "step": 81140 - }, - { - "epoch": 0.7173924574338302, - "grad_norm": 2.012118101119995, - "learning_rate": 3.8043459042769496e-05, - "loss": 0.664, - "step": 81150 - }, - { - "epoch": 0.7174808606941424, - "grad_norm": 3.5045692920684814, - "learning_rate": 3.804198565509763e-05, - "loss": 0.6162, - "step": 81160 - }, - { - "epoch": 0.7175692639544546, - "grad_norm": 5.1168999671936035, - "learning_rate": 3.804051226742576e-05, - "loss": 0.6448, - "step": 81170 - }, - { - "epoch": 0.7176576672147669, - "grad_norm": 8.003167152404785, - "learning_rate": 3.803903887975389e-05, - "loss": 0.7621, - "step": 81180 - }, - { - "epoch": 0.7177460704750791, - "grad_norm": 1.8028844594955444, - "learning_rate": 3.8037565492082016e-05, - "loss": 0.6371, - "step": 81190 - }, - { - "epoch": 0.7178344737353913, - "grad_norm": 3.978248357772827, - "learning_rate": 3.803609210441015e-05, - "loss": 0.6651, - "step": 81200 - }, - { - "epoch": 0.7179228769957036, - "grad_norm": 1.696570634841919, - "learning_rate": 3.803461871673827e-05, - "loss": 0.6472, - "step": 81210 - }, - { - "epoch": 0.7180112802560158, - "grad_norm": 1.6071555614471436, - "learning_rate": 3.803314532906641e-05, - "loss": 0.589, - "step": 81220 - }, - { - "epoch": 0.7180996835163281, - "grad_norm": 1.5361149311065674, - "learning_rate": 3.803167194139453e-05, - "loss": 0.5857, - "step": 81230 - }, - { - "epoch": 0.7181880867766404, - "grad_norm": 1.8883726596832275, - "learning_rate": 3.8030198553722665e-05, - "loss": 0.6103, - "step": 81240 - }, - { - "epoch": 0.7182764900369526, - "grad_norm": 1.2080509662628174, - "learning_rate": 3.802872516605079e-05, - "loss": 0.5576, - "step": 81250 - }, - { - "epoch": 0.7183648932972648, - "grad_norm": 2.866602897644043, - "learning_rate": 3.802725177837892e-05, - "loss": 0.6364, - "step": 81260 - }, - { - "epoch": 0.7184532965575771, - "grad_norm": 2.0462229251861572, - "learning_rate": 3.802577839070705e-05, - "loss": 0.7218, - "step": 81270 - }, - { - "epoch": 0.7185416998178893, - "grad_norm": 1.7809785604476929, - "learning_rate": 3.8024305003035185e-05, - "loss": 0.6613, - "step": 81280 - }, - { - "epoch": 0.7186301030782015, - "grad_norm": 1.510802149772644, - "learning_rate": 3.8022831615363306e-05, - "loss": 0.7402, - "step": 81290 - }, - { - "epoch": 0.7187185063385138, - "grad_norm": 6.516298294067383, - "learning_rate": 3.802135822769144e-05, - "loss": 0.8887, - "step": 81300 - }, - { - "epoch": 0.718806909598826, - "grad_norm": 3.8273253440856934, - "learning_rate": 3.801988484001957e-05, - "loss": 0.6008, - "step": 81310 - }, - { - "epoch": 0.7188953128591382, - "grad_norm": 1.5741015672683716, - "learning_rate": 3.80184114523477e-05, - "loss": 0.6555, - "step": 81320 - }, - { - "epoch": 0.7189837161194504, - "grad_norm": 2.636172294616699, - "learning_rate": 3.8016938064675827e-05, - "loss": 0.7384, - "step": 81330 - }, - { - "epoch": 0.7190721193797627, - "grad_norm": 4.42274284362793, - "learning_rate": 3.801546467700396e-05, - "loss": 0.6152, - "step": 81340 - }, - { - "epoch": 0.719160522640075, - "grad_norm": 3.990666627883911, - "learning_rate": 3.801399128933208e-05, - "loss": 0.6806, - "step": 81350 - }, - { - "epoch": 0.7192489259003872, - "grad_norm": 4.11974573135376, - "learning_rate": 3.801251790166022e-05, - "loss": 0.6603, - "step": 81360 - }, - { - "epoch": 0.7193373291606995, - "grad_norm": 1.5518118143081665, - "learning_rate": 3.801104451398834e-05, - "loss": 0.656, - "step": 81370 - }, - { - "epoch": 0.7194257324210117, - "grad_norm": 1.8162482976913452, - "learning_rate": 3.8009571126316475e-05, - "loss": 0.6368, - "step": 81380 - }, - { - "epoch": 0.7195141356813239, - "grad_norm": 13.060083389282227, - "learning_rate": 3.8008097738644604e-05, - "loss": 0.6244, - "step": 81390 - }, - { - "epoch": 0.7196025389416362, - "grad_norm": 1.791451096534729, - "learning_rate": 3.800662435097273e-05, - "loss": 0.6481, - "step": 81400 - }, - { - "epoch": 0.7196909422019484, - "grad_norm": 2.118098735809326, - "learning_rate": 3.800515096330086e-05, - "loss": 0.7088, - "step": 81410 - }, - { - "epoch": 0.7197793454622606, - "grad_norm": 1.72208833694458, - "learning_rate": 3.8003677575628995e-05, - "loss": 0.6061, - "step": 81420 - }, - { - "epoch": 0.7198677487225729, - "grad_norm": 5.208735466003418, - "learning_rate": 3.800220418795712e-05, - "loss": 0.6558, - "step": 81430 - }, - { - "epoch": 0.7199561519828851, - "grad_norm": 16.971261978149414, - "learning_rate": 3.800073080028525e-05, - "loss": 0.7517, - "step": 81440 - }, - { - "epoch": 0.7200445552431973, - "grad_norm": 1.423710584640503, - "learning_rate": 3.799925741261338e-05, - "loss": 0.5638, - "step": 81450 - }, - { - "epoch": 0.7201329585035096, - "grad_norm": 10.31123161315918, - "learning_rate": 3.799778402494151e-05, - "loss": 0.6973, - "step": 81460 - }, - { - "epoch": 0.7202213617638219, - "grad_norm": 2.5266685485839844, - "learning_rate": 3.799631063726964e-05, - "loss": 0.5689, - "step": 81470 - }, - { - "epoch": 0.7203097650241341, - "grad_norm": 1.584389567375183, - "learning_rate": 3.7994837249597765e-05, - "loss": 0.6623, - "step": 81480 - }, - { - "epoch": 0.7203981682844464, - "grad_norm": 5.045012474060059, - "learning_rate": 3.7993363861925894e-05, - "loss": 0.5619, - "step": 81490 - }, - { - "epoch": 0.7204865715447586, - "grad_norm": 1.9340811967849731, - "learning_rate": 3.799189047425403e-05, - "loss": 0.6351, - "step": 81500 - }, - { - "epoch": 0.7205749748050708, - "grad_norm": 2.4334867000579834, - "learning_rate": 3.799041708658215e-05, - "loss": 0.6287, - "step": 81510 - }, - { - "epoch": 0.720663378065383, - "grad_norm": 3.7819254398345947, - "learning_rate": 3.7988943698910286e-05, - "loss": 0.6718, - "step": 81520 - }, - { - "epoch": 0.7207517813256953, - "grad_norm": 2.841998815536499, - "learning_rate": 3.7987470311238414e-05, - "loss": 0.6195, - "step": 81530 - }, - { - "epoch": 0.7208401845860075, - "grad_norm": 1.7949743270874023, - "learning_rate": 3.798599692356654e-05, - "loss": 0.6644, - "step": 81540 - }, - { - "epoch": 0.7209285878463197, - "grad_norm": 6.6643853187561035, - "learning_rate": 3.798452353589467e-05, - "loss": 0.6357, - "step": 81550 - }, - { - "epoch": 0.721016991106632, - "grad_norm": 1.5048494338989258, - "learning_rate": 3.7983050148222806e-05, - "loss": 0.6431, - "step": 81560 - }, - { - "epoch": 0.7211053943669442, - "grad_norm": 2.609926700592041, - "learning_rate": 3.798157676055093e-05, - "loss": 0.7046, - "step": 81570 - }, - { - "epoch": 0.7211937976272564, - "grad_norm": 5.734683990478516, - "learning_rate": 3.798010337287906e-05, - "loss": 0.6196, - "step": 81580 - }, - { - "epoch": 0.7212822008875688, - "grad_norm": 3.050089120864868, - "learning_rate": 3.7978629985207184e-05, - "loss": 0.7234, - "step": 81590 - }, - { - "epoch": 0.721370604147881, - "grad_norm": 2.5078630447387695, - "learning_rate": 3.797715659753532e-05, - "loss": 0.7657, - "step": 81600 - }, - { - "epoch": 0.7214590074081932, - "grad_norm": 2.299607276916504, - "learning_rate": 3.797568320986345e-05, - "loss": 0.7014, - "step": 81610 - }, - { - "epoch": 0.7215474106685055, - "grad_norm": 4.529541492462158, - "learning_rate": 3.7974209822191576e-05, - "loss": 0.6828, - "step": 81620 - }, - { - "epoch": 0.7216358139288177, - "grad_norm": 5.3901448249816895, - "learning_rate": 3.7972736434519704e-05, - "loss": 0.6141, - "step": 81630 - }, - { - "epoch": 0.7217242171891299, - "grad_norm": 2.1895015239715576, - "learning_rate": 3.797126304684784e-05, - "loss": 0.6778, - "step": 81640 - }, - { - "epoch": 0.7218126204494422, - "grad_norm": 5.135331153869629, - "learning_rate": 3.796978965917596e-05, - "loss": 0.8217, - "step": 81650 - }, - { - "epoch": 0.7219010237097544, - "grad_norm": 1.9905357360839844, - "learning_rate": 3.7968316271504096e-05, - "loss": 0.6438, - "step": 81660 - }, - { - "epoch": 0.7219894269700666, - "grad_norm": 13.713616371154785, - "learning_rate": 3.7966842883832225e-05, - "loss": 0.7791, - "step": 81670 - }, - { - "epoch": 0.7220778302303789, - "grad_norm": 3.2609429359436035, - "learning_rate": 3.796536949616035e-05, - "loss": 0.6227, - "step": 81680 - }, - { - "epoch": 0.7221662334906911, - "grad_norm": 2.5888688564300537, - "learning_rate": 3.796389610848848e-05, - "loss": 0.5572, - "step": 81690 - }, - { - "epoch": 0.7222546367510034, - "grad_norm": 1.1970055103302002, - "learning_rate": 3.796242272081661e-05, - "loss": 0.5683, - "step": 81700 - }, - { - "epoch": 0.7223430400113157, - "grad_norm": 7.1145453453063965, - "learning_rate": 3.796094933314474e-05, - "loss": 0.5811, - "step": 81710 - }, - { - "epoch": 0.7224314432716279, - "grad_norm": 4.881281852722168, - "learning_rate": 3.795947594547287e-05, - "loss": 0.7372, - "step": 81720 - }, - { - "epoch": 0.7225198465319401, - "grad_norm": 4.914188861846924, - "learning_rate": 3.7958002557800995e-05, - "loss": 0.4989, - "step": 81730 - }, - { - "epoch": 0.7226082497922524, - "grad_norm": 4.660118579864502, - "learning_rate": 3.795652917012913e-05, - "loss": 0.6057, - "step": 81740 - }, - { - "epoch": 0.7226966530525646, - "grad_norm": 1.2871594429016113, - "learning_rate": 3.795505578245726e-05, - "loss": 0.5653, - "step": 81750 - }, - { - "epoch": 0.7227850563128768, - "grad_norm": 2.805406332015991, - "learning_rate": 3.7953582394785386e-05, - "loss": 0.7284, - "step": 81760 - }, - { - "epoch": 0.722873459573189, - "grad_norm": 6.828197956085205, - "learning_rate": 3.7952109007113515e-05, - "loss": 0.7238, - "step": 81770 - }, - { - "epoch": 0.7229618628335013, - "grad_norm": 5.44737434387207, - "learning_rate": 3.795063561944165e-05, - "loss": 0.6699, - "step": 81780 - }, - { - "epoch": 0.7230502660938135, - "grad_norm": 1.4210898876190186, - "learning_rate": 3.794916223176977e-05, - "loss": 0.602, - "step": 81790 - }, - { - "epoch": 0.7231386693541257, - "grad_norm": 3.5432026386260986, - "learning_rate": 3.794768884409791e-05, - "loss": 0.7718, - "step": 81800 - }, - { - "epoch": 0.723227072614438, - "grad_norm": 3.968459367752075, - "learning_rate": 3.7946215456426035e-05, - "loss": 0.6815, - "step": 81810 - }, - { - "epoch": 0.7233154758747503, - "grad_norm": 3.1968178749084473, - "learning_rate": 3.794474206875416e-05, - "loss": 0.7348, - "step": 81820 - }, - { - "epoch": 0.7234038791350625, - "grad_norm": 2.341172218322754, - "learning_rate": 3.794326868108229e-05, - "loss": 0.5412, - "step": 81830 - }, - { - "epoch": 0.7234922823953748, - "grad_norm": 2.320580005645752, - "learning_rate": 3.794179529341042e-05, - "loss": 0.7299, - "step": 81840 - }, - { - "epoch": 0.723580685655687, - "grad_norm": 3.793396234512329, - "learning_rate": 3.794032190573855e-05, - "loss": 0.7211, - "step": 81850 - }, - { - "epoch": 0.7236690889159992, - "grad_norm": 7.644009113311768, - "learning_rate": 3.7938848518066684e-05, - "loss": 0.6952, - "step": 81860 - }, - { - "epoch": 0.7237574921763115, - "grad_norm": 5.067923069000244, - "learning_rate": 3.793737513039481e-05, - "loss": 0.735, - "step": 81870 - }, - { - "epoch": 0.7238458954366237, - "grad_norm": 7.491464138031006, - "learning_rate": 3.793590174272294e-05, - "loss": 0.6496, - "step": 81880 - }, - { - "epoch": 0.7239342986969359, - "grad_norm": 3.3565855026245117, - "learning_rate": 3.793442835505107e-05, - "loss": 0.6818, - "step": 81890 - }, - { - "epoch": 0.7240227019572482, - "grad_norm": 2.9698851108551025, - "learning_rate": 3.79329549673792e-05, - "loss": 0.6887, - "step": 81900 - }, - { - "epoch": 0.7241111052175604, - "grad_norm": 3.4320688247680664, - "learning_rate": 3.7931481579707325e-05, - "loss": 0.7373, - "step": 81910 - }, - { - "epoch": 0.7241995084778726, - "grad_norm": 4.822113513946533, - "learning_rate": 3.793000819203546e-05, - "loss": 0.6452, - "step": 81920 - }, - { - "epoch": 0.7242879117381849, - "grad_norm": 2.3925328254699707, - "learning_rate": 3.792853480436359e-05, - "loss": 0.6385, - "step": 81930 - }, - { - "epoch": 0.7243763149984972, - "grad_norm": 7.681417942047119, - "learning_rate": 3.792706141669172e-05, - "loss": 0.6281, - "step": 81940 - }, - { - "epoch": 0.7244647182588094, - "grad_norm": 6.202743053436279, - "learning_rate": 3.7925588029019846e-05, - "loss": 0.689, - "step": 81950 - }, - { - "epoch": 0.7245531215191217, - "grad_norm": 4.275755882263184, - "learning_rate": 3.7924114641347974e-05, - "loss": 0.6615, - "step": 81960 - }, - { - "epoch": 0.7246415247794339, - "grad_norm": 1.701641321182251, - "learning_rate": 3.79226412536761e-05, - "loss": 0.6777, - "step": 81970 - }, - { - "epoch": 0.7247299280397461, - "grad_norm": 3.189011812210083, - "learning_rate": 3.792116786600423e-05, - "loss": 0.7767, - "step": 81980 - }, - { - "epoch": 0.7248183313000583, - "grad_norm": 4.4323272705078125, - "learning_rate": 3.7919694478332366e-05, - "loss": 0.6551, - "step": 81990 - }, - { - "epoch": 0.7249067345603706, - "grad_norm": 3.3805644512176514, - "learning_rate": 3.7918221090660494e-05, - "loss": 0.6323, - "step": 82000 - }, - { - "epoch": 0.7249951378206828, - "grad_norm": 1.8189047574996948, - "learning_rate": 3.791674770298862e-05, - "loss": 0.6673, - "step": 82010 - }, - { - "epoch": 0.725083541080995, - "grad_norm": 3.39742112159729, - "learning_rate": 3.791527431531675e-05, - "loss": 0.7428, - "step": 82020 - }, - { - "epoch": 0.7251719443413073, - "grad_norm": 8.843940734863281, - "learning_rate": 3.791380092764488e-05, - "loss": 0.641, - "step": 82030 - }, - { - "epoch": 0.7252603476016195, - "grad_norm": 2.9795138835906982, - "learning_rate": 3.791232753997301e-05, - "loss": 0.6441, - "step": 82040 - }, - { - "epoch": 0.7253487508619317, - "grad_norm": 1.0929025411605835, - "learning_rate": 3.791085415230114e-05, - "loss": 0.6991, - "step": 82050 - }, - { - "epoch": 0.7254371541222441, - "grad_norm": 0.9400179386138916, - "learning_rate": 3.7909380764629264e-05, - "loss": 0.6448, - "step": 82060 - }, - { - "epoch": 0.7255255573825563, - "grad_norm": 5.6500773429870605, - "learning_rate": 3.79079073769574e-05, - "loss": 0.7272, - "step": 82070 - }, - { - "epoch": 0.7256139606428685, - "grad_norm": 12.625765800476074, - "learning_rate": 3.790643398928553e-05, - "loss": 0.6164, - "step": 82080 - }, - { - "epoch": 0.7257023639031808, - "grad_norm": 2.4194607734680176, - "learning_rate": 3.7904960601613656e-05, - "loss": 0.6672, - "step": 82090 - }, - { - "epoch": 0.725790767163493, - "grad_norm": 4.919355869293213, - "learning_rate": 3.7903487213941784e-05, - "loss": 0.6211, - "step": 82100 - }, - { - "epoch": 0.7258791704238052, - "grad_norm": 1.40232515335083, - "learning_rate": 3.790201382626992e-05, - "loss": 0.5534, - "step": 82110 - }, - { - "epoch": 0.7259675736841175, - "grad_norm": 0.8732109069824219, - "learning_rate": 3.790054043859804e-05, - "loss": 0.6815, - "step": 82120 - }, - { - "epoch": 0.7260559769444297, - "grad_norm": 1.5525437593460083, - "learning_rate": 3.7899067050926176e-05, - "loss": 0.8336, - "step": 82130 - }, - { - "epoch": 0.7261443802047419, - "grad_norm": 2.76408314704895, - "learning_rate": 3.7897593663254305e-05, - "loss": 0.7518, - "step": 82140 - }, - { - "epoch": 0.7262327834650542, - "grad_norm": 2.739830255508423, - "learning_rate": 3.789612027558243e-05, - "loss": 0.5977, - "step": 82150 - }, - { - "epoch": 0.7263211867253664, - "grad_norm": 1.6819652318954468, - "learning_rate": 3.789464688791056e-05, - "loss": 0.6824, - "step": 82160 - }, - { - "epoch": 0.7264095899856786, - "grad_norm": 3.837034225463867, - "learning_rate": 3.7893173500238696e-05, - "loss": 0.614, - "step": 82170 - }, - { - "epoch": 0.726497993245991, - "grad_norm": 2.2636003494262695, - "learning_rate": 3.789170011256682e-05, - "loss": 0.8269, - "step": 82180 - }, - { - "epoch": 0.7265863965063032, - "grad_norm": 2.8763487339019775, - "learning_rate": 3.789022672489495e-05, - "loss": 0.6978, - "step": 82190 - }, - { - "epoch": 0.7266747997666154, - "grad_norm": 1.058937668800354, - "learning_rate": 3.7888753337223075e-05, - "loss": 0.714, - "step": 82200 - }, - { - "epoch": 0.7267632030269277, - "grad_norm": 2.0732810497283936, - "learning_rate": 3.788727994955121e-05, - "loss": 0.6954, - "step": 82210 - }, - { - "epoch": 0.7268516062872399, - "grad_norm": 11.375685691833496, - "learning_rate": 3.788580656187934e-05, - "loss": 0.6545, - "step": 82220 - }, - { - "epoch": 0.7269400095475521, - "grad_norm": 1.0989155769348145, - "learning_rate": 3.7884333174207467e-05, - "loss": 0.6257, - "step": 82230 - }, - { - "epoch": 0.7270284128078643, - "grad_norm": 1.6817965507507324, - "learning_rate": 3.7882859786535595e-05, - "loss": 0.4865, - "step": 82240 - }, - { - "epoch": 0.7271168160681766, - "grad_norm": 6.18862247467041, - "learning_rate": 3.788138639886373e-05, - "loss": 0.7283, - "step": 82250 - }, - { - "epoch": 0.7272052193284888, - "grad_norm": 16.718080520629883, - "learning_rate": 3.787991301119185e-05, - "loss": 0.7228, - "step": 82260 - }, - { - "epoch": 0.727293622588801, - "grad_norm": 3.0961620807647705, - "learning_rate": 3.787843962351999e-05, - "loss": 0.7523, - "step": 82270 - }, - { - "epoch": 0.7273820258491133, - "grad_norm": 16.639848709106445, - "learning_rate": 3.7876966235848115e-05, - "loss": 0.6769, - "step": 82280 - }, - { - "epoch": 0.7274704291094256, - "grad_norm": 9.027290344238281, - "learning_rate": 3.7875492848176243e-05, - "loss": 0.6618, - "step": 82290 - }, - { - "epoch": 0.7275588323697378, - "grad_norm": 4.046119213104248, - "learning_rate": 3.787401946050437e-05, - "loss": 0.768, - "step": 82300 - }, - { - "epoch": 0.7276472356300501, - "grad_norm": 2.05656361579895, - "learning_rate": 3.78725460728325e-05, - "loss": 0.7273, - "step": 82310 - }, - { - "epoch": 0.7277356388903623, - "grad_norm": 2.0593655109405518, - "learning_rate": 3.787107268516063e-05, - "loss": 0.7483, - "step": 82320 - }, - { - "epoch": 0.7278240421506745, - "grad_norm": 3.8659236431121826, - "learning_rate": 3.7869599297488764e-05, - "loss": 0.796, - "step": 82330 - }, - { - "epoch": 0.7279124454109868, - "grad_norm": 2.786943197250366, - "learning_rate": 3.7868125909816885e-05, - "loss": 0.678, - "step": 82340 - }, - { - "epoch": 0.728000848671299, - "grad_norm": 4.119709491729736, - "learning_rate": 3.786665252214502e-05, - "loss": 0.6518, - "step": 82350 - }, - { - "epoch": 0.7280892519316112, - "grad_norm": 2.0500283241271973, - "learning_rate": 3.786517913447315e-05, - "loss": 0.5561, - "step": 82360 - }, - { - "epoch": 0.7281776551919235, - "grad_norm": 3.5243289470672607, - "learning_rate": 3.786370574680128e-05, - "loss": 0.7494, - "step": 82370 - }, - { - "epoch": 0.7282660584522357, - "grad_norm": 5.052689075469971, - "learning_rate": 3.7862232359129405e-05, - "loss": 0.713, - "step": 82380 - }, - { - "epoch": 0.7283544617125479, - "grad_norm": 1.687080979347229, - "learning_rate": 3.786075897145754e-05, - "loss": 0.6209, - "step": 82390 - }, - { - "epoch": 0.7284428649728601, - "grad_norm": 1.6656676530838013, - "learning_rate": 3.785928558378566e-05, - "loss": 0.6535, - "step": 82400 - }, - { - "epoch": 0.7285312682331725, - "grad_norm": 2.815579652786255, - "learning_rate": 3.78578121961138e-05, - "loss": 0.555, - "step": 82410 - }, - { - "epoch": 0.7286196714934847, - "grad_norm": 9.232556343078613, - "learning_rate": 3.785633880844192e-05, - "loss": 0.6742, - "step": 82420 - }, - { - "epoch": 0.728708074753797, - "grad_norm": 5.655826568603516, - "learning_rate": 3.7854865420770054e-05, - "loss": 0.7752, - "step": 82430 - }, - { - "epoch": 0.7287964780141092, - "grad_norm": 5.988077640533447, - "learning_rate": 3.785339203309818e-05, - "loss": 0.6712, - "step": 82440 - }, - { - "epoch": 0.7288848812744214, - "grad_norm": 0.9591336846351624, - "learning_rate": 3.785191864542631e-05, - "loss": 0.5992, - "step": 82450 - }, - { - "epoch": 0.7289732845347336, - "grad_norm": 1.510759711265564, - "learning_rate": 3.785044525775444e-05, - "loss": 0.6131, - "step": 82460 - }, - { - "epoch": 0.7290616877950459, - "grad_norm": 13.347774505615234, - "learning_rate": 3.7848971870082574e-05, - "loss": 0.5697, - "step": 82470 - }, - { - "epoch": 0.7291500910553581, - "grad_norm": 2.2034530639648438, - "learning_rate": 3.7847498482410696e-05, - "loss": 0.8204, - "step": 82480 - }, - { - "epoch": 0.7292384943156703, - "grad_norm": 2.9784910678863525, - "learning_rate": 3.784602509473883e-05, - "loss": 0.775, - "step": 82490 - }, - { - "epoch": 0.7293268975759826, - "grad_norm": 4.6114397048950195, - "learning_rate": 3.784455170706696e-05, - "loss": 0.7688, - "step": 82500 - }, - { - "epoch": 0.7294153008362948, - "grad_norm": 7.151790142059326, - "learning_rate": 3.784307831939509e-05, - "loss": 0.6117, - "step": 82510 - }, - { - "epoch": 0.729503704096607, - "grad_norm": 4.189168453216553, - "learning_rate": 3.7841604931723216e-05, - "loss": 0.6043, - "step": 82520 - }, - { - "epoch": 0.7295921073569194, - "grad_norm": 1.0064668655395508, - "learning_rate": 3.7840131544051344e-05, - "loss": 0.5606, - "step": 82530 - }, - { - "epoch": 0.7296805106172316, - "grad_norm": 3.5928053855895996, - "learning_rate": 3.783865815637947e-05, - "loss": 0.6251, - "step": 82540 - }, - { - "epoch": 0.7297689138775438, - "grad_norm": 1.864996075630188, - "learning_rate": 3.783718476870761e-05, - "loss": 0.5181, - "step": 82550 - }, - { - "epoch": 0.7298573171378561, - "grad_norm": 1.9529139995574951, - "learning_rate": 3.783571138103573e-05, - "loss": 0.5528, - "step": 82560 - }, - { - "epoch": 0.7299457203981683, - "grad_norm": 7.490105152130127, - "learning_rate": 3.7834237993363864e-05, - "loss": 0.7557, - "step": 82570 - }, - { - "epoch": 0.7300341236584805, - "grad_norm": 1.795874834060669, - "learning_rate": 3.783276460569199e-05, - "loss": 0.7436, - "step": 82580 - }, - { - "epoch": 0.7301225269187928, - "grad_norm": 3.031679630279541, - "learning_rate": 3.783129121802012e-05, - "loss": 0.5611, - "step": 82590 - }, - { - "epoch": 0.730210930179105, - "grad_norm": 2.3319554328918457, - "learning_rate": 3.782981783034825e-05, - "loss": 0.7378, - "step": 82600 - }, - { - "epoch": 0.7302993334394172, - "grad_norm": 1.281991958618164, - "learning_rate": 3.7828344442676385e-05, - "loss": 0.4882, - "step": 82610 - }, - { - "epoch": 0.7303877366997295, - "grad_norm": 1.561753273010254, - "learning_rate": 3.7826871055004506e-05, - "loss": 0.75, - "step": 82620 - }, - { - "epoch": 0.7304761399600417, - "grad_norm": 3.4038097858428955, - "learning_rate": 3.782539766733264e-05, - "loss": 0.7109, - "step": 82630 - }, - { - "epoch": 0.7305645432203539, - "grad_norm": 1.3466259241104126, - "learning_rate": 3.782392427966077e-05, - "loss": 0.604, - "step": 82640 - }, - { - "epoch": 0.7306529464806663, - "grad_norm": 3.2525291442871094, - "learning_rate": 3.78224508919889e-05, - "loss": 0.6011, - "step": 82650 - }, - { - "epoch": 0.7307413497409785, - "grad_norm": 1.857112169265747, - "learning_rate": 3.7820977504317026e-05, - "loss": 0.6465, - "step": 82660 - }, - { - "epoch": 0.7308297530012907, - "grad_norm": 2.114192247390747, - "learning_rate": 3.7819504116645155e-05, - "loss": 0.6263, - "step": 82670 - }, - { - "epoch": 0.730918156261603, - "grad_norm": 2.1825144290924072, - "learning_rate": 3.781803072897328e-05, - "loss": 0.7194, - "step": 82680 - }, - { - "epoch": 0.7310065595219152, - "grad_norm": 1.5394127368927002, - "learning_rate": 3.781655734130142e-05, - "loss": 0.7264, - "step": 82690 - }, - { - "epoch": 0.7310949627822274, - "grad_norm": 2.3071682453155518, - "learning_rate": 3.781508395362954e-05, - "loss": 0.816, - "step": 82700 - }, - { - "epoch": 0.7311833660425396, - "grad_norm": 2.225712776184082, - "learning_rate": 3.7813610565957675e-05, - "loss": 0.7076, - "step": 82710 - }, - { - "epoch": 0.7312717693028519, - "grad_norm": 1.7534329891204834, - "learning_rate": 3.78121371782858e-05, - "loss": 0.6413, - "step": 82720 - }, - { - "epoch": 0.7313601725631641, - "grad_norm": 2.1758055686950684, - "learning_rate": 3.781066379061393e-05, - "loss": 0.644, - "step": 82730 - }, - { - "epoch": 0.7314485758234763, - "grad_norm": 6.965531349182129, - "learning_rate": 3.780919040294206e-05, - "loss": 0.7453, - "step": 82740 - }, - { - "epoch": 0.7315369790837886, - "grad_norm": 7.385221004486084, - "learning_rate": 3.7807717015270195e-05, - "loss": 0.7283, - "step": 82750 - }, - { - "epoch": 0.7316253823441009, - "grad_norm": 1.3409184217453003, - "learning_rate": 3.780624362759832e-05, - "loss": 0.6674, - "step": 82760 - }, - { - "epoch": 0.7317137856044131, - "grad_norm": 2.5681004524230957, - "learning_rate": 3.780477023992645e-05, - "loss": 0.6004, - "step": 82770 - }, - { - "epoch": 0.7318021888647254, - "grad_norm": 2.5477404594421387, - "learning_rate": 3.780329685225458e-05, - "loss": 0.6222, - "step": 82780 - }, - { - "epoch": 0.7318905921250376, - "grad_norm": 4.637944221496582, - "learning_rate": 3.780182346458271e-05, - "loss": 0.6573, - "step": 82790 - }, - { - "epoch": 0.7319789953853498, - "grad_norm": 1.322871208190918, - "learning_rate": 3.780035007691084e-05, - "loss": 0.646, - "step": 82800 - }, - { - "epoch": 0.7320673986456621, - "grad_norm": 1.9769188165664673, - "learning_rate": 3.7798876689238965e-05, - "loss": 0.6736, - "step": 82810 - }, - { - "epoch": 0.7321558019059743, - "grad_norm": 2.6647613048553467, - "learning_rate": 3.7797403301567094e-05, - "loss": 0.7257, - "step": 82820 - }, - { - "epoch": 0.7322442051662865, - "grad_norm": 9.220582008361816, - "learning_rate": 3.779592991389523e-05, - "loss": 0.7633, - "step": 82830 - }, - { - "epoch": 0.7323326084265988, - "grad_norm": 1.625688076019287, - "learning_rate": 3.779445652622336e-05, - "loss": 0.7025, - "step": 82840 - }, - { - "epoch": 0.732421011686911, - "grad_norm": 4.385787487030029, - "learning_rate": 3.7792983138551485e-05, - "loss": 0.8389, - "step": 82850 - }, - { - "epoch": 0.7325094149472232, - "grad_norm": 1.5261093378067017, - "learning_rate": 3.7791509750879614e-05, - "loss": 0.6195, - "step": 82860 - }, - { - "epoch": 0.7325978182075354, - "grad_norm": 2.3391811847686768, - "learning_rate": 3.779003636320774e-05, - "loss": 0.5418, - "step": 82870 - }, - { - "epoch": 0.7326862214678478, - "grad_norm": 3.247581958770752, - "learning_rate": 3.778856297553587e-05, - "loss": 0.6039, - "step": 82880 - }, - { - "epoch": 0.73277462472816, - "grad_norm": 1.761211633682251, - "learning_rate": 3.7787089587864e-05, - "loss": 0.6608, - "step": 82890 - }, - { - "epoch": 0.7328630279884722, - "grad_norm": 6.203659534454346, - "learning_rate": 3.7785616200192134e-05, - "loss": 0.7611, - "step": 82900 - }, - { - "epoch": 0.7329514312487845, - "grad_norm": 2.116152763366699, - "learning_rate": 3.778414281252026e-05, - "loss": 0.6829, - "step": 82910 - }, - { - "epoch": 0.7330398345090967, - "grad_norm": 7.488345623016357, - "learning_rate": 3.778266942484839e-05, - "loss": 0.6787, - "step": 82920 - }, - { - "epoch": 0.7331282377694089, - "grad_norm": 3.8275585174560547, - "learning_rate": 3.778119603717652e-05, - "loss": 0.6351, - "step": 82930 - }, - { - "epoch": 0.7332166410297212, - "grad_norm": 3.363858938217163, - "learning_rate": 3.777972264950465e-05, - "loss": 0.6049, - "step": 82940 - }, - { - "epoch": 0.7333050442900334, - "grad_norm": 1.35905921459198, - "learning_rate": 3.7778249261832776e-05, - "loss": 0.585, - "step": 82950 - }, - { - "epoch": 0.7333934475503456, - "grad_norm": 6.123122692108154, - "learning_rate": 3.777677587416091e-05, - "loss": 0.6661, - "step": 82960 - }, - { - "epoch": 0.7334818508106579, - "grad_norm": 3.0487587451934814, - "learning_rate": 3.777530248648904e-05, - "loss": 0.665, - "step": 82970 - }, - { - "epoch": 0.7335702540709701, - "grad_norm": 7.3421549797058105, - "learning_rate": 3.777382909881717e-05, - "loss": 0.6879, - "step": 82980 - }, - { - "epoch": 0.7336586573312823, - "grad_norm": 1.6624705791473389, - "learning_rate": 3.7772355711145296e-05, - "loss": 0.6313, - "step": 82990 - }, - { - "epoch": 0.7337470605915947, - "grad_norm": 4.164744853973389, - "learning_rate": 3.7770882323473424e-05, - "loss": 0.6981, - "step": 83000 - }, - { - "epoch": 0.7338354638519069, - "grad_norm": 7.852123260498047, - "learning_rate": 3.776940893580155e-05, - "loss": 0.7305, - "step": 83010 - }, - { - "epoch": 0.7339238671122191, - "grad_norm": 7.19644832611084, - "learning_rate": 3.776793554812969e-05, - "loss": 0.6751, - "step": 83020 - }, - { - "epoch": 0.7340122703725314, - "grad_norm": 5.935701847076416, - "learning_rate": 3.776646216045781e-05, - "loss": 0.5763, - "step": 83030 - }, - { - "epoch": 0.7341006736328436, - "grad_norm": 8.278578758239746, - "learning_rate": 3.7764988772785945e-05, - "loss": 0.7584, - "step": 83040 - }, - { - "epoch": 0.7341890768931558, - "grad_norm": 1.1739051342010498, - "learning_rate": 3.776351538511407e-05, - "loss": 0.6151, - "step": 83050 - }, - { - "epoch": 0.734277480153468, - "grad_norm": 1.8704761266708374, - "learning_rate": 3.77620419974422e-05, - "loss": 0.6379, - "step": 83060 - }, - { - "epoch": 0.7343658834137803, - "grad_norm": 4.368412017822266, - "learning_rate": 3.776056860977033e-05, - "loss": 0.6465, - "step": 83070 - }, - { - "epoch": 0.7344542866740925, - "grad_norm": 4.229257106781006, - "learning_rate": 3.7759095222098465e-05, - "loss": 0.6963, - "step": 83080 - }, - { - "epoch": 0.7345426899344047, - "grad_norm": 9.565871238708496, - "learning_rate": 3.7757621834426586e-05, - "loss": 0.7075, - "step": 83090 - }, - { - "epoch": 0.734631093194717, - "grad_norm": 1.0253034830093384, - "learning_rate": 3.775614844675472e-05, - "loss": 0.6859, - "step": 83100 - }, - { - "epoch": 0.7347194964550292, - "grad_norm": 3.467026948928833, - "learning_rate": 3.775467505908285e-05, - "loss": 0.6699, - "step": 83110 - }, - { - "epoch": 0.7348078997153416, - "grad_norm": 1.897066354751587, - "learning_rate": 3.775320167141098e-05, - "loss": 0.7024, - "step": 83120 - }, - { - "epoch": 0.7348963029756538, - "grad_norm": 8.542876243591309, - "learning_rate": 3.7751728283739107e-05, - "loss": 0.6004, - "step": 83130 - }, - { - "epoch": 0.734984706235966, - "grad_norm": 2.2165987491607666, - "learning_rate": 3.7750254896067235e-05, - "loss": 0.6797, - "step": 83140 - }, - { - "epoch": 0.7350731094962782, - "grad_norm": 5.751006126403809, - "learning_rate": 3.774878150839536e-05, - "loss": 0.6325, - "step": 83150 - }, - { - "epoch": 0.7351615127565905, - "grad_norm": 1.4556961059570312, - "learning_rate": 3.77473081207235e-05, - "loss": 0.6399, - "step": 83160 - }, - { - "epoch": 0.7352499160169027, - "grad_norm": 15.751131057739258, - "learning_rate": 3.774583473305162e-05, - "loss": 0.7067, - "step": 83170 - }, - { - "epoch": 0.7353383192772149, - "grad_norm": 2.450960397720337, - "learning_rate": 3.7744361345379755e-05, - "loss": 0.5672, - "step": 83180 - }, - { - "epoch": 0.7354267225375272, - "grad_norm": 2.299738645553589, - "learning_rate": 3.7742887957707883e-05, - "loss": 0.6697, - "step": 83190 - }, - { - "epoch": 0.7355151257978394, - "grad_norm": 3.7470109462738037, - "learning_rate": 3.774141457003601e-05, - "loss": 0.589, - "step": 83200 - }, - { - "epoch": 0.7356035290581516, - "grad_norm": 2.5312650203704834, - "learning_rate": 3.773994118236414e-05, - "loss": 0.6406, - "step": 83210 - }, - { - "epoch": 0.7356919323184639, - "grad_norm": 2.1556906700134277, - "learning_rate": 3.7738467794692275e-05, - "loss": 0.6681, - "step": 83220 - }, - { - "epoch": 0.7357803355787761, - "grad_norm": 2.699314594268799, - "learning_rate": 3.77369944070204e-05, - "loss": 0.6691, - "step": 83230 - }, - { - "epoch": 0.7358687388390884, - "grad_norm": 2.0234696865081787, - "learning_rate": 3.773552101934853e-05, - "loss": 0.5997, - "step": 83240 - }, - { - "epoch": 0.7359571420994007, - "grad_norm": 2.5400748252868652, - "learning_rate": 3.7734047631676654e-05, - "loss": 0.685, - "step": 83250 - }, - { - "epoch": 0.7360455453597129, - "grad_norm": 5.217487335205078, - "learning_rate": 3.773257424400479e-05, - "loss": 0.6832, - "step": 83260 - }, - { - "epoch": 0.7361339486200251, - "grad_norm": 0.9310530424118042, - "learning_rate": 3.773110085633292e-05, - "loss": 0.5563, - "step": 83270 - }, - { - "epoch": 0.7362223518803374, - "grad_norm": 2.381751537322998, - "learning_rate": 3.7729627468661045e-05, - "loss": 0.7837, - "step": 83280 - }, - { - "epoch": 0.7363107551406496, - "grad_norm": 2.303239583969116, - "learning_rate": 3.7728154080989174e-05, - "loss": 0.6513, - "step": 83290 - }, - { - "epoch": 0.7363991584009618, - "grad_norm": 7.5340094566345215, - "learning_rate": 3.772668069331731e-05, - "loss": 0.8153, - "step": 83300 - }, - { - "epoch": 0.736487561661274, - "grad_norm": 3.0403056144714355, - "learning_rate": 3.772520730564543e-05, - "loss": 0.6001, - "step": 83310 - }, - { - "epoch": 0.7365759649215863, - "grad_norm": 8.362848281860352, - "learning_rate": 3.7723733917973566e-05, - "loss": 0.8238, - "step": 83320 - }, - { - "epoch": 0.7366643681818985, - "grad_norm": 1.840634822845459, - "learning_rate": 3.7722260530301694e-05, - "loss": 0.6153, - "step": 83330 - }, - { - "epoch": 0.7367527714422107, - "grad_norm": 1.6935734748840332, - "learning_rate": 3.772078714262982e-05, - "loss": 0.709, - "step": 83340 - }, - { - "epoch": 0.7368411747025231, - "grad_norm": 5.5450825691223145, - "learning_rate": 3.771931375495795e-05, - "loss": 0.6664, - "step": 83350 - }, - { - "epoch": 0.7369295779628353, - "grad_norm": 4.621242046356201, - "learning_rate": 3.771784036728608e-05, - "loss": 0.6342, - "step": 83360 - }, - { - "epoch": 0.7370179812231475, - "grad_norm": 4.499111652374268, - "learning_rate": 3.771636697961421e-05, - "loss": 0.6453, - "step": 83370 - }, - { - "epoch": 0.7371063844834598, - "grad_norm": 3.9753365516662598, - "learning_rate": 3.771489359194234e-05, - "loss": 0.537, - "step": 83380 - }, - { - "epoch": 0.737194787743772, - "grad_norm": 1.1098240613937378, - "learning_rate": 3.7713420204270464e-05, - "loss": 0.5643, - "step": 83390 - }, - { - "epoch": 0.7372831910040842, - "grad_norm": 5.1562724113464355, - "learning_rate": 3.77119468165986e-05, - "loss": 0.7421, - "step": 83400 - }, - { - "epoch": 0.7373715942643965, - "grad_norm": 1.4846030473709106, - "learning_rate": 3.771047342892673e-05, - "loss": 0.7462, - "step": 83410 - }, - { - "epoch": 0.7374599975247087, - "grad_norm": 5.749865531921387, - "learning_rate": 3.7709000041254856e-05, - "loss": 0.5743, - "step": 83420 - }, - { - "epoch": 0.7375484007850209, - "grad_norm": 5.566555023193359, - "learning_rate": 3.7707526653582984e-05, - "loss": 0.622, - "step": 83430 - }, - { - "epoch": 0.7376368040453332, - "grad_norm": 2.281686544418335, - "learning_rate": 3.770605326591112e-05, - "loss": 0.6668, - "step": 83440 - }, - { - "epoch": 0.7377252073056454, - "grad_norm": 1.5201878547668457, - "learning_rate": 3.770457987823924e-05, - "loss": 0.8028, - "step": 83450 - }, - { - "epoch": 0.7378136105659576, - "grad_norm": 1.441184639930725, - "learning_rate": 3.7703106490567376e-05, - "loss": 0.7622, - "step": 83460 - }, - { - "epoch": 0.73790201382627, - "grad_norm": 1.495642900466919, - "learning_rate": 3.77016331028955e-05, - "loss": 0.6142, - "step": 83470 - }, - { - "epoch": 0.7379904170865822, - "grad_norm": 3.465707778930664, - "learning_rate": 3.770015971522363e-05, - "loss": 0.5996, - "step": 83480 - }, - { - "epoch": 0.7380788203468944, - "grad_norm": 5.066530227661133, - "learning_rate": 3.769868632755176e-05, - "loss": 0.7761, - "step": 83490 - }, - { - "epoch": 0.7381672236072067, - "grad_norm": 4.373834609985352, - "learning_rate": 3.769721293987989e-05, - "loss": 0.6554, - "step": 83500 - }, - { - "epoch": 0.7382556268675189, - "grad_norm": 1.6781747341156006, - "learning_rate": 3.769573955220802e-05, - "loss": 0.719, - "step": 83510 - }, - { - "epoch": 0.7383440301278311, - "grad_norm": 4.214022636413574, - "learning_rate": 3.769426616453615e-05, - "loss": 0.6745, - "step": 83520 - }, - { - "epoch": 0.7384324333881433, - "grad_norm": 5.292420387268066, - "learning_rate": 3.7692792776864275e-05, - "loss": 0.728, - "step": 83530 - }, - { - "epoch": 0.7385208366484556, - "grad_norm": 1.5029011964797974, - "learning_rate": 3.769131938919241e-05, - "loss": 0.8833, - "step": 83540 - }, - { - "epoch": 0.7386092399087678, - "grad_norm": 2.422543525695801, - "learning_rate": 3.768984600152054e-05, - "loss": 0.6862, - "step": 83550 - }, - { - "epoch": 0.73869764316908, - "grad_norm": 1.2955564260482788, - "learning_rate": 3.7688372613848666e-05, - "loss": 0.6666, - "step": 83560 - }, - { - "epoch": 0.7387860464293923, - "grad_norm": 1.1184724569320679, - "learning_rate": 3.7686899226176795e-05, - "loss": 0.5686, - "step": 83570 - }, - { - "epoch": 0.7388744496897045, - "grad_norm": 4.629207134246826, - "learning_rate": 3.768542583850493e-05, - "loss": 0.6956, - "step": 83580 - }, - { - "epoch": 0.7389628529500168, - "grad_norm": 7.006459712982178, - "learning_rate": 3.768395245083305e-05, - "loss": 0.6794, - "step": 83590 - }, - { - "epoch": 0.7390512562103291, - "grad_norm": 3.655062198638916, - "learning_rate": 3.7682479063161187e-05, - "loss": 0.7095, - "step": 83600 - }, - { - "epoch": 0.7391396594706413, - "grad_norm": 4.1207594871521, - "learning_rate": 3.768100567548931e-05, - "loss": 0.6169, - "step": 83610 - }, - { - "epoch": 0.7392280627309535, - "grad_norm": 1.8817453384399414, - "learning_rate": 3.767953228781744e-05, - "loss": 0.6754, - "step": 83620 - }, - { - "epoch": 0.7393164659912658, - "grad_norm": 4.477394104003906, - "learning_rate": 3.767805890014557e-05, - "loss": 0.7391, - "step": 83630 - }, - { - "epoch": 0.739404869251578, - "grad_norm": 1.1810672283172607, - "learning_rate": 3.76765855124737e-05, - "loss": 0.6219, - "step": 83640 - }, - { - "epoch": 0.7394932725118902, - "grad_norm": 2.279214859008789, - "learning_rate": 3.767511212480183e-05, - "loss": 0.7055, - "step": 83650 - }, - { - "epoch": 0.7395816757722025, - "grad_norm": 1.1570273637771606, - "learning_rate": 3.7673638737129963e-05, - "loss": 0.6998, - "step": 83660 - }, - { - "epoch": 0.7396700790325147, - "grad_norm": 4.0594801902771, - "learning_rate": 3.7672165349458085e-05, - "loss": 0.5268, - "step": 83670 - }, - { - "epoch": 0.7397584822928269, - "grad_norm": 2.7007806301116943, - "learning_rate": 3.767069196178622e-05, - "loss": 0.6917, - "step": 83680 - }, - { - "epoch": 0.7398468855531392, - "grad_norm": 5.185434818267822, - "learning_rate": 3.766921857411435e-05, - "loss": 0.5995, - "step": 83690 - }, - { - "epoch": 0.7399352888134514, - "grad_norm": 3.2521445751190186, - "learning_rate": 3.766774518644248e-05, - "loss": 0.6451, - "step": 83700 - }, - { - "epoch": 0.7400236920737637, - "grad_norm": 15.640610694885254, - "learning_rate": 3.7666271798770605e-05, - "loss": 0.6014, - "step": 83710 - }, - { - "epoch": 0.740112095334076, - "grad_norm": 2.6641318798065186, - "learning_rate": 3.7664798411098734e-05, - "loss": 0.7148, - "step": 83720 - }, - { - "epoch": 0.7402004985943882, - "grad_norm": 4.184748649597168, - "learning_rate": 3.766332502342686e-05, - "loss": 0.718, - "step": 83730 - }, - { - "epoch": 0.7402889018547004, - "grad_norm": 3.5810635089874268, - "learning_rate": 3.7661851635755e-05, - "loss": 0.6951, - "step": 83740 - }, - { - "epoch": 0.7403773051150127, - "grad_norm": 2.6039035320281982, - "learning_rate": 3.7660378248083125e-05, - "loss": 0.6446, - "step": 83750 - }, - { - "epoch": 0.7404657083753249, - "grad_norm": 1.8136022090911865, - "learning_rate": 3.7658904860411254e-05, - "loss": 0.7126, - "step": 83760 - }, - { - "epoch": 0.7405541116356371, - "grad_norm": 2.059373140335083, - "learning_rate": 3.765743147273938e-05, - "loss": 0.4997, - "step": 83770 - }, - { - "epoch": 0.7406425148959493, - "grad_norm": 2.658827066421509, - "learning_rate": 3.765595808506751e-05, - "loss": 0.6084, - "step": 83780 - }, - { - "epoch": 0.7407309181562616, - "grad_norm": 2.070920467376709, - "learning_rate": 3.765448469739564e-05, - "loss": 0.6939, - "step": 83790 - }, - { - "epoch": 0.7408193214165738, - "grad_norm": 6.838007926940918, - "learning_rate": 3.7653011309723774e-05, - "loss": 0.6527, - "step": 83800 - }, - { - "epoch": 0.740907724676886, - "grad_norm": 3.6335978507995605, - "learning_rate": 3.76515379220519e-05, - "loss": 0.6913, - "step": 83810 - }, - { - "epoch": 0.7409961279371984, - "grad_norm": 2.9283552169799805, - "learning_rate": 3.765006453438003e-05, - "loss": 0.6935, - "step": 83820 - }, - { - "epoch": 0.7410845311975106, - "grad_norm": 2.8865389823913574, - "learning_rate": 3.764859114670816e-05, - "loss": 0.8142, - "step": 83830 - }, - { - "epoch": 0.7411729344578228, - "grad_norm": 4.269875526428223, - "learning_rate": 3.764711775903629e-05, - "loss": 0.6683, - "step": 83840 - }, - { - "epoch": 0.7412613377181351, - "grad_norm": 2.1077256202697754, - "learning_rate": 3.7645644371364416e-05, - "loss": 0.7241, - "step": 83850 - }, - { - "epoch": 0.7413497409784473, - "grad_norm": 1.362311601638794, - "learning_rate": 3.7644170983692544e-05, - "loss": 0.6739, - "step": 83860 - }, - { - "epoch": 0.7414381442387595, - "grad_norm": 2.4714369773864746, - "learning_rate": 3.764269759602068e-05, - "loss": 0.5756, - "step": 83870 - }, - { - "epoch": 0.7415265474990718, - "grad_norm": 8.367737770080566, - "learning_rate": 3.764122420834881e-05, - "loss": 0.7419, - "step": 83880 - }, - { - "epoch": 0.741614950759384, - "grad_norm": 9.864267349243164, - "learning_rate": 3.7639750820676936e-05, - "loss": 0.6326, - "step": 83890 - }, - { - "epoch": 0.7417033540196962, - "grad_norm": 3.680027484893799, - "learning_rate": 3.7638277433005064e-05, - "loss": 0.7048, - "step": 83900 - }, - { - "epoch": 0.7417917572800085, - "grad_norm": 2.4032979011535645, - "learning_rate": 3.763680404533319e-05, - "loss": 0.7104, - "step": 83910 - }, - { - "epoch": 0.7418801605403207, - "grad_norm": 5.893245697021484, - "learning_rate": 3.763533065766132e-05, - "loss": 0.6404, - "step": 83920 - }, - { - "epoch": 0.7419685638006329, - "grad_norm": 2.3711273670196533, - "learning_rate": 3.7633857269989456e-05, - "loss": 0.6871, - "step": 83930 - }, - { - "epoch": 0.7420569670609453, - "grad_norm": 1.8997288942337036, - "learning_rate": 3.763238388231758e-05, - "loss": 0.6988, - "step": 83940 - }, - { - "epoch": 0.7421453703212575, - "grad_norm": 8.349346160888672, - "learning_rate": 3.763091049464571e-05, - "loss": 0.7116, - "step": 83950 - }, - { - "epoch": 0.7422337735815697, - "grad_norm": 5.230564594268799, - "learning_rate": 3.762943710697384e-05, - "loss": 0.6246, - "step": 83960 - }, - { - "epoch": 0.742322176841882, - "grad_norm": 2.8075129985809326, - "learning_rate": 3.762796371930197e-05, - "loss": 0.6132, - "step": 83970 - }, - { - "epoch": 0.7424105801021942, - "grad_norm": 3.2089080810546875, - "learning_rate": 3.76264903316301e-05, - "loss": 0.6156, - "step": 83980 - }, - { - "epoch": 0.7424989833625064, - "grad_norm": 3.1539509296417236, - "learning_rate": 3.762501694395823e-05, - "loss": 0.6312, - "step": 83990 - }, - { - "epoch": 0.7425873866228186, - "grad_norm": 2.6977450847625732, - "learning_rate": 3.7623543556286355e-05, - "loss": 0.6348, - "step": 84000 - }, - { - "epoch": 0.7426757898831309, - "grad_norm": 3.076150894165039, - "learning_rate": 3.762207016861449e-05, - "loss": 0.5621, - "step": 84010 - }, - { - "epoch": 0.7427641931434431, - "grad_norm": 3.1662416458129883, - "learning_rate": 3.762059678094262e-05, - "loss": 0.7502, - "step": 84020 - }, - { - "epoch": 0.7428525964037553, - "grad_norm": 3.0767221450805664, - "learning_rate": 3.7619123393270746e-05, - "loss": 0.79, - "step": 84030 - }, - { - "epoch": 0.7429409996640676, - "grad_norm": 6.01400899887085, - "learning_rate": 3.7617650005598875e-05, - "loss": 0.7268, - "step": 84040 - }, - { - "epoch": 0.7430294029243798, - "grad_norm": 3.0729949474334717, - "learning_rate": 3.761617661792701e-05, - "loss": 0.6725, - "step": 84050 - }, - { - "epoch": 0.7431178061846921, - "grad_norm": 3.168278455734253, - "learning_rate": 3.761470323025513e-05, - "loss": 0.6452, - "step": 84060 - }, - { - "epoch": 0.7432062094450044, - "grad_norm": 2.7801151275634766, - "learning_rate": 3.761322984258327e-05, - "loss": 0.6718, - "step": 84070 - }, - { - "epoch": 0.7432946127053166, - "grad_norm": 6.022086143493652, - "learning_rate": 3.761175645491139e-05, - "loss": 0.5809, - "step": 84080 - }, - { - "epoch": 0.7433830159656288, - "grad_norm": 2.4401657581329346, - "learning_rate": 3.761028306723952e-05, - "loss": 0.7353, - "step": 84090 - }, - { - "epoch": 0.7434714192259411, - "grad_norm": 4.503438472747803, - "learning_rate": 3.760880967956765e-05, - "loss": 0.666, - "step": 84100 - }, - { - "epoch": 0.7435598224862533, - "grad_norm": 2.0846970081329346, - "learning_rate": 3.760733629189578e-05, - "loss": 0.7087, - "step": 84110 - }, - { - "epoch": 0.7436482257465655, - "grad_norm": 3.1882002353668213, - "learning_rate": 3.760586290422391e-05, - "loss": 0.6863, - "step": 84120 - }, - { - "epoch": 0.7437366290068778, - "grad_norm": 5.416436672210693, - "learning_rate": 3.7604389516552044e-05, - "loss": 0.6927, - "step": 84130 - }, - { - "epoch": 0.74382503226719, - "grad_norm": 1.339212417602539, - "learning_rate": 3.7602916128880165e-05, - "loss": 0.6296, - "step": 84140 - }, - { - "epoch": 0.7439134355275022, - "grad_norm": 6.03978157043457, - "learning_rate": 3.76014427412083e-05, - "loss": 0.6019, - "step": 84150 - }, - { - "epoch": 0.7440018387878145, - "grad_norm": 7.817158222198486, - "learning_rate": 3.759996935353643e-05, - "loss": 0.7386, - "step": 84160 - }, - { - "epoch": 0.7440902420481267, - "grad_norm": 2.0685369968414307, - "learning_rate": 3.759849596586456e-05, - "loss": 0.6602, - "step": 84170 - }, - { - "epoch": 0.744178645308439, - "grad_norm": 1.6310970783233643, - "learning_rate": 3.7597022578192685e-05, - "loss": 0.5695, - "step": 84180 - }, - { - "epoch": 0.7442670485687513, - "grad_norm": 1.040309190750122, - "learning_rate": 3.7595549190520814e-05, - "loss": 0.7032, - "step": 84190 - }, - { - "epoch": 0.7443554518290635, - "grad_norm": 2.361133098602295, - "learning_rate": 3.759407580284894e-05, - "loss": 0.7757, - "step": 84200 - }, - { - "epoch": 0.7444438550893757, - "grad_norm": 4.239267349243164, - "learning_rate": 3.759260241517708e-05, - "loss": 0.6926, - "step": 84210 - }, - { - "epoch": 0.744532258349688, - "grad_norm": 2.7009713649749756, - "learning_rate": 3.75911290275052e-05, - "loss": 0.7644, - "step": 84220 - }, - { - "epoch": 0.7446206616100002, - "grad_norm": 3.142449140548706, - "learning_rate": 3.7589655639833334e-05, - "loss": 0.6601, - "step": 84230 - }, - { - "epoch": 0.7447090648703124, - "grad_norm": 2.860416889190674, - "learning_rate": 3.758818225216146e-05, - "loss": 0.5282, - "step": 84240 - }, - { - "epoch": 0.7447974681306246, - "grad_norm": 1.2303674221038818, - "learning_rate": 3.758670886448959e-05, - "loss": 0.6776, - "step": 84250 - }, - { - "epoch": 0.7448858713909369, - "grad_norm": 5.118793487548828, - "learning_rate": 3.758523547681772e-05, - "loss": 0.7907, - "step": 84260 - }, - { - "epoch": 0.7449742746512491, - "grad_norm": 2.992825746536255, - "learning_rate": 3.7583762089145854e-05, - "loss": 0.8036, - "step": 84270 - }, - { - "epoch": 0.7450626779115613, - "grad_norm": 2.319847583770752, - "learning_rate": 3.7582288701473976e-05, - "loss": 0.6061, - "step": 84280 - }, - { - "epoch": 0.7451510811718736, - "grad_norm": 15.495037078857422, - "learning_rate": 3.758081531380211e-05, - "loss": 0.6218, - "step": 84290 - }, - { - "epoch": 0.7452394844321859, - "grad_norm": 3.1386735439300537, - "learning_rate": 3.757934192613023e-05, - "loss": 0.6729, - "step": 84300 - }, - { - "epoch": 0.7453278876924981, - "grad_norm": 1.256740689277649, - "learning_rate": 3.757786853845837e-05, - "loss": 0.5731, - "step": 84310 - }, - { - "epoch": 0.7454162909528104, - "grad_norm": 1.222428798675537, - "learning_rate": 3.7576395150786496e-05, - "loss": 0.4752, - "step": 84320 - }, - { - "epoch": 0.7455046942131226, - "grad_norm": 8.0045804977417, - "learning_rate": 3.7574921763114624e-05, - "loss": 0.6779, - "step": 84330 - }, - { - "epoch": 0.7455930974734348, - "grad_norm": 5.968054294586182, - "learning_rate": 3.757344837544275e-05, - "loss": 0.678, - "step": 84340 - }, - { - "epoch": 0.7456815007337471, - "grad_norm": 4.230433940887451, - "learning_rate": 3.757197498777089e-05, - "loss": 0.7098, - "step": 84350 - }, - { - "epoch": 0.7457699039940593, - "grad_norm": 2.3833985328674316, - "learning_rate": 3.757050160009901e-05, - "loss": 0.7, - "step": 84360 - }, - { - "epoch": 0.7458583072543715, - "grad_norm": 2.216752052307129, - "learning_rate": 3.7569028212427144e-05, - "loss": 0.5883, - "step": 84370 - }, - { - "epoch": 0.7459467105146838, - "grad_norm": 2.3393094539642334, - "learning_rate": 3.756755482475527e-05, - "loss": 0.7256, - "step": 84380 - }, - { - "epoch": 0.746035113774996, - "grad_norm": 1.5323232412338257, - "learning_rate": 3.75660814370834e-05, - "loss": 0.685, - "step": 84390 - }, - { - "epoch": 0.7461235170353082, - "grad_norm": 10.415721893310547, - "learning_rate": 3.756460804941153e-05, - "loss": 0.631, - "step": 84400 - }, - { - "epoch": 0.7462119202956206, - "grad_norm": 2.981957197189331, - "learning_rate": 3.756313466173966e-05, - "loss": 0.6316, - "step": 84410 - }, - { - "epoch": 0.7463003235559328, - "grad_norm": 5.538896083831787, - "learning_rate": 3.7561661274067786e-05, - "loss": 0.578, - "step": 84420 - }, - { - "epoch": 0.746388726816245, - "grad_norm": 7.855510234832764, - "learning_rate": 3.756018788639592e-05, - "loss": 0.7256, - "step": 84430 - }, - { - "epoch": 0.7464771300765572, - "grad_norm": 6.738065719604492, - "learning_rate": 3.755871449872404e-05, - "loss": 0.4829, - "step": 84440 - }, - { - "epoch": 0.7465655333368695, - "grad_norm": 6.32013463973999, - "learning_rate": 3.755724111105218e-05, - "loss": 0.6204, - "step": 84450 - }, - { - "epoch": 0.7466539365971817, - "grad_norm": 3.561615228652954, - "learning_rate": 3.7555767723380306e-05, - "loss": 0.6177, - "step": 84460 - }, - { - "epoch": 0.7467423398574939, - "grad_norm": 2.4124794006347656, - "learning_rate": 3.7554294335708435e-05, - "loss": 0.5233, - "step": 84470 - }, - { - "epoch": 0.7468307431178062, - "grad_norm": 1.7479076385498047, - "learning_rate": 3.755282094803656e-05, - "loss": 0.7237, - "step": 84480 - }, - { - "epoch": 0.7469191463781184, - "grad_norm": 3.5975189208984375, - "learning_rate": 3.75513475603647e-05, - "loss": 0.6279, - "step": 84490 - }, - { - "epoch": 0.7470075496384306, - "grad_norm": 5.031615734100342, - "learning_rate": 3.754987417269282e-05, - "loss": 0.7757, - "step": 84500 - }, - { - "epoch": 0.7470959528987429, - "grad_norm": 3.85677170753479, - "learning_rate": 3.7548400785020955e-05, - "loss": 0.5713, - "step": 84510 - }, - { - "epoch": 0.7471843561590551, - "grad_norm": 8.631434440612793, - "learning_rate": 3.754692739734908e-05, - "loss": 0.6599, - "step": 84520 - }, - { - "epoch": 0.7472727594193674, - "grad_norm": 1.915344476699829, - "learning_rate": 3.754545400967721e-05, - "loss": 0.5745, - "step": 84530 - }, - { - "epoch": 0.7473611626796797, - "grad_norm": 7.097794055938721, - "learning_rate": 3.754398062200534e-05, - "loss": 0.5727, - "step": 84540 - }, - { - "epoch": 0.7474495659399919, - "grad_norm": 1.8751381635665894, - "learning_rate": 3.754250723433347e-05, - "loss": 0.584, - "step": 84550 - }, - { - "epoch": 0.7475379692003041, - "grad_norm": 1.7229143381118774, - "learning_rate": 3.75410338466616e-05, - "loss": 0.5988, - "step": 84560 - }, - { - "epoch": 0.7476263724606164, - "grad_norm": 2.0988352298736572, - "learning_rate": 3.753956045898973e-05, - "loss": 0.6226, - "step": 84570 - }, - { - "epoch": 0.7477147757209286, - "grad_norm": 7.097356796264648, - "learning_rate": 3.753808707131785e-05, - "loss": 0.5733, - "step": 84580 - }, - { - "epoch": 0.7478031789812408, - "grad_norm": 1.6344846487045288, - "learning_rate": 3.753661368364599e-05, - "loss": 0.6081, - "step": 84590 - }, - { - "epoch": 0.747891582241553, - "grad_norm": 3.0487451553344727, - "learning_rate": 3.753514029597412e-05, - "loss": 0.5739, - "step": 84600 - }, - { - "epoch": 0.7479799855018653, - "grad_norm": 1.8894730806350708, - "learning_rate": 3.7533666908302245e-05, - "loss": 0.6757, - "step": 84610 - }, - { - "epoch": 0.7480683887621775, - "grad_norm": 2.5171899795532227, - "learning_rate": 3.7532193520630374e-05, - "loss": 0.8411, - "step": 84620 - }, - { - "epoch": 0.7481567920224897, - "grad_norm": 4.379422664642334, - "learning_rate": 3.753072013295851e-05, - "loss": 0.6768, - "step": 84630 - }, - { - "epoch": 0.748245195282802, - "grad_norm": 8.756941795349121, - "learning_rate": 3.752924674528664e-05, - "loss": 0.8038, - "step": 84640 - }, - { - "epoch": 0.7483335985431143, - "grad_norm": 11.941573143005371, - "learning_rate": 3.7527773357614765e-05, - "loss": 0.6889, - "step": 84650 - }, - { - "epoch": 0.7484220018034266, - "grad_norm": 2.0575923919677734, - "learning_rate": 3.7526299969942894e-05, - "loss": 0.6922, - "step": 84660 - }, - { - "epoch": 0.7485104050637388, - "grad_norm": 3.003082513809204, - "learning_rate": 3.752482658227102e-05, - "loss": 0.5843, - "step": 84670 - }, - { - "epoch": 0.748598808324051, - "grad_norm": 2.6774916648864746, - "learning_rate": 3.752335319459915e-05, - "loss": 0.7793, - "step": 84680 - }, - { - "epoch": 0.7486872115843632, - "grad_norm": 4.0084099769592285, - "learning_rate": 3.752187980692728e-05, - "loss": 0.6427, - "step": 84690 - }, - { - "epoch": 0.7487756148446755, - "grad_norm": 3.386981725692749, - "learning_rate": 3.7520406419255414e-05, - "loss": 0.5562, - "step": 84700 - }, - { - "epoch": 0.7488640181049877, - "grad_norm": 1.8329048156738281, - "learning_rate": 3.751893303158354e-05, - "loss": 0.6764, - "step": 84710 - }, - { - "epoch": 0.7489524213652999, - "grad_norm": 1.757285237312317, - "learning_rate": 3.751745964391167e-05, - "loss": 0.6155, - "step": 84720 - }, - { - "epoch": 0.7490408246256122, - "grad_norm": 1.4587023258209229, - "learning_rate": 3.75159862562398e-05, - "loss": 0.6608, - "step": 84730 - }, - { - "epoch": 0.7491292278859244, - "grad_norm": 8.046749114990234, - "learning_rate": 3.751451286856793e-05, - "loss": 0.5967, - "step": 84740 - }, - { - "epoch": 0.7492176311462366, - "grad_norm": 4.4256062507629395, - "learning_rate": 3.7513039480896056e-05, - "loss": 0.6597, - "step": 84750 - }, - { - "epoch": 0.7493060344065489, - "grad_norm": 5.429772853851318, - "learning_rate": 3.751156609322419e-05, - "loss": 0.6772, - "step": 84760 - }, - { - "epoch": 0.7493944376668612, - "grad_norm": 1.6209697723388672, - "learning_rate": 3.751009270555231e-05, - "loss": 0.5931, - "step": 84770 - }, - { - "epoch": 0.7494828409271734, - "grad_norm": 3.219609022140503, - "learning_rate": 3.750861931788045e-05, - "loss": 0.6778, - "step": 84780 - }, - { - "epoch": 0.7495712441874857, - "grad_norm": 3.7768449783325195, - "learning_rate": 3.7507145930208576e-05, - "loss": 0.5966, - "step": 84790 - }, - { - "epoch": 0.7496596474477979, - "grad_norm": 1.5925281047821045, - "learning_rate": 3.7505672542536704e-05, - "loss": 0.6377, - "step": 84800 - }, - { - "epoch": 0.7497480507081101, - "grad_norm": 1.883607029914856, - "learning_rate": 3.750419915486483e-05, - "loss": 0.8659, - "step": 84810 - }, - { - "epoch": 0.7498364539684224, - "grad_norm": 5.472825050354004, - "learning_rate": 3.750272576719297e-05, - "loss": 0.684, - "step": 84820 - }, - { - "epoch": 0.7499248572287346, - "grad_norm": 2.8847320079803467, - "learning_rate": 3.750125237952109e-05, - "loss": 0.6232, - "step": 84830 - }, - { - "epoch": 0.7500132604890468, - "grad_norm": 5.925868034362793, - "learning_rate": 3.7499778991849224e-05, - "loss": 0.657, - "step": 84840 - }, - { - "epoch": 0.750101663749359, - "grad_norm": 6.294375419616699, - "learning_rate": 3.749830560417735e-05, - "loss": 0.6016, - "step": 84850 - }, - { - "epoch": 0.7501900670096713, - "grad_norm": 3.546903610229492, - "learning_rate": 3.749683221650548e-05, - "loss": 0.6389, - "step": 84860 - }, - { - "epoch": 0.7502784702699835, - "grad_norm": 6.391054153442383, - "learning_rate": 3.749535882883361e-05, - "loss": 0.6071, - "step": 84870 - }, - { - "epoch": 0.7503668735302959, - "grad_norm": 4.3632636070251465, - "learning_rate": 3.749388544116174e-05, - "loss": 0.7108, - "step": 84880 - }, - { - "epoch": 0.7504552767906081, - "grad_norm": 3.853543996810913, - "learning_rate": 3.7492412053489866e-05, - "loss": 0.7068, - "step": 84890 - }, - { - "epoch": 0.7505436800509203, - "grad_norm": 1.2865716218948364, - "learning_rate": 3.7490938665818e-05, - "loss": 0.6535, - "step": 84900 - }, - { - "epoch": 0.7506320833112325, - "grad_norm": 3.5363283157348633, - "learning_rate": 3.748946527814612e-05, - "loss": 0.783, - "step": 84910 - }, - { - "epoch": 0.7507204865715448, - "grad_norm": 4.21813440322876, - "learning_rate": 3.748799189047426e-05, - "loss": 0.5929, - "step": 84920 - }, - { - "epoch": 0.750808889831857, - "grad_norm": 3.862945079803467, - "learning_rate": 3.7486518502802386e-05, - "loss": 0.7237, - "step": 84930 - }, - { - "epoch": 0.7508972930921692, - "grad_norm": 1.0606372356414795, - "learning_rate": 3.7485045115130515e-05, - "loss": 0.6162, - "step": 84940 - }, - { - "epoch": 0.7509856963524815, - "grad_norm": 1.2253813743591309, - "learning_rate": 3.748357172745864e-05, - "loss": 0.5597, - "step": 84950 - }, - { - "epoch": 0.7510740996127937, - "grad_norm": 4.704916954040527, - "learning_rate": 3.748209833978678e-05, - "loss": 0.588, - "step": 84960 - }, - { - "epoch": 0.7511625028731059, - "grad_norm": 17.943359375, - "learning_rate": 3.74806249521149e-05, - "loss": 0.785, - "step": 84970 - }, - { - "epoch": 0.7512509061334182, - "grad_norm": 6.664664268493652, - "learning_rate": 3.7479151564443035e-05, - "loss": 0.669, - "step": 84980 - }, - { - "epoch": 0.7513393093937304, - "grad_norm": 2.415360689163208, - "learning_rate": 3.747767817677116e-05, - "loss": 0.6002, - "step": 84990 - }, - { - "epoch": 0.7514277126540427, - "grad_norm": 6.830345153808594, - "learning_rate": 3.747620478909929e-05, - "loss": 0.6042, - "step": 85000 - }, - { - "epoch": 0.751516115914355, - "grad_norm": 1.9554429054260254, - "learning_rate": 3.747473140142742e-05, - "loss": 0.6581, - "step": 85010 - }, - { - "epoch": 0.7516045191746672, - "grad_norm": 3.2804136276245117, - "learning_rate": 3.747325801375555e-05, - "loss": 0.6722, - "step": 85020 - }, - { - "epoch": 0.7516929224349794, - "grad_norm": 3.311786651611328, - "learning_rate": 3.747178462608368e-05, - "loss": 0.7336, - "step": 85030 - }, - { - "epoch": 0.7517813256952917, - "grad_norm": 5.547338008880615, - "learning_rate": 3.747031123841181e-05, - "loss": 0.7348, - "step": 85040 - }, - { - "epoch": 0.7518697289556039, - "grad_norm": 2.4761946201324463, - "learning_rate": 3.7468837850739933e-05, - "loss": 0.7087, - "step": 85050 - }, - { - "epoch": 0.7519581322159161, - "grad_norm": 6.19984769821167, - "learning_rate": 3.746736446306807e-05, - "loss": 0.6201, - "step": 85060 - }, - { - "epoch": 0.7520465354762284, - "grad_norm": 2.218047857284546, - "learning_rate": 3.74658910753962e-05, - "loss": 0.5292, - "step": 85070 - }, - { - "epoch": 0.7521349387365406, - "grad_norm": 3.5358660221099854, - "learning_rate": 3.7464417687724325e-05, - "loss": 0.7117, - "step": 85080 - }, - { - "epoch": 0.7522233419968528, - "grad_norm": 1.6040410995483398, - "learning_rate": 3.7462944300052454e-05, - "loss": 0.6922, - "step": 85090 - }, - { - "epoch": 0.752311745257165, - "grad_norm": 1.895133376121521, - "learning_rate": 3.746147091238059e-05, - "loss": 0.6707, - "step": 85100 - }, - { - "epoch": 0.7524001485174773, - "grad_norm": 5.289239883422852, - "learning_rate": 3.745999752470871e-05, - "loss": 0.6576, - "step": 85110 - }, - { - "epoch": 0.7524885517777896, - "grad_norm": 2.582613468170166, - "learning_rate": 3.7458524137036845e-05, - "loss": 0.6006, - "step": 85120 - }, - { - "epoch": 0.7525769550381018, - "grad_norm": 15.104894638061523, - "learning_rate": 3.745705074936497e-05, - "loss": 0.6407, - "step": 85130 - }, - { - "epoch": 0.7526653582984141, - "grad_norm": 10.050370216369629, - "learning_rate": 3.74555773616931e-05, - "loss": 0.5614, - "step": 85140 - }, - { - "epoch": 0.7527537615587263, - "grad_norm": 3.571335554122925, - "learning_rate": 3.745410397402123e-05, - "loss": 0.526, - "step": 85150 - }, - { - "epoch": 0.7528421648190385, - "grad_norm": 3.719113826751709, - "learning_rate": 3.745263058634936e-05, - "loss": 0.7387, - "step": 85160 - }, - { - "epoch": 0.7529305680793508, - "grad_norm": 3.958775520324707, - "learning_rate": 3.745115719867749e-05, - "loss": 0.5996, - "step": 85170 - }, - { - "epoch": 0.753018971339663, - "grad_norm": 2.792614221572876, - "learning_rate": 3.744968381100562e-05, - "loss": 0.5216, - "step": 85180 - }, - { - "epoch": 0.7531073745999752, - "grad_norm": 9.038586616516113, - "learning_rate": 3.7448210423333744e-05, - "loss": 0.6776, - "step": 85190 - }, - { - "epoch": 0.7531957778602875, - "grad_norm": 10.211748123168945, - "learning_rate": 3.744673703566188e-05, - "loss": 0.6809, - "step": 85200 - }, - { - "epoch": 0.7532841811205997, - "grad_norm": 13.097444534301758, - "learning_rate": 3.744526364799001e-05, - "loss": 0.5742, - "step": 85210 - }, - { - "epoch": 0.7533725843809119, - "grad_norm": 4.21989107131958, - "learning_rate": 3.7443790260318136e-05, - "loss": 0.6275, - "step": 85220 - }, - { - "epoch": 0.7534609876412242, - "grad_norm": 1.35722815990448, - "learning_rate": 3.7442316872646264e-05, - "loss": 0.7118, - "step": 85230 - }, - { - "epoch": 0.7535493909015365, - "grad_norm": 1.5782872438430786, - "learning_rate": 3.744084348497439e-05, - "loss": 0.641, - "step": 85240 - }, - { - "epoch": 0.7536377941618487, - "grad_norm": 1.3326246738433838, - "learning_rate": 3.743937009730252e-05, - "loss": 0.6758, - "step": 85250 - }, - { - "epoch": 0.753726197422161, - "grad_norm": 8.397066116333008, - "learning_rate": 3.7437896709630656e-05, - "loss": 0.7465, - "step": 85260 - }, - { - "epoch": 0.7538146006824732, - "grad_norm": 5.0338969230651855, - "learning_rate": 3.743642332195878e-05, - "loss": 0.7145, - "step": 85270 - }, - { - "epoch": 0.7539030039427854, - "grad_norm": 4.390265941619873, - "learning_rate": 3.743494993428691e-05, - "loss": 0.6464, - "step": 85280 - }, - { - "epoch": 0.7539914072030977, - "grad_norm": 2.3420324325561523, - "learning_rate": 3.743347654661504e-05, - "loss": 0.7682, - "step": 85290 - }, - { - "epoch": 0.7540798104634099, - "grad_norm": 3.8715598583221436, - "learning_rate": 3.743200315894317e-05, - "loss": 0.6269, - "step": 85300 - }, - { - "epoch": 0.7541682137237221, - "grad_norm": 1.51862633228302, - "learning_rate": 3.74305297712713e-05, - "loss": 0.6234, - "step": 85310 - }, - { - "epoch": 0.7542566169840343, - "grad_norm": 7.025180816650391, - "learning_rate": 3.742905638359943e-05, - "loss": 0.5331, - "step": 85320 - }, - { - "epoch": 0.7543450202443466, - "grad_norm": 6.612129211425781, - "learning_rate": 3.7427582995927554e-05, - "loss": 0.6728, - "step": 85330 - }, - { - "epoch": 0.7544334235046588, - "grad_norm": 4.373544692993164, - "learning_rate": 3.742610960825569e-05, - "loss": 0.8271, - "step": 85340 - }, - { - "epoch": 0.754521826764971, - "grad_norm": 1.9424684047698975, - "learning_rate": 3.742463622058382e-05, - "loss": 0.901, - "step": 85350 - }, - { - "epoch": 0.7546102300252834, - "grad_norm": 2.035431146621704, - "learning_rate": 3.7423162832911946e-05, - "loss": 0.5488, - "step": 85360 - }, - { - "epoch": 0.7546986332855956, - "grad_norm": 2.8291876316070557, - "learning_rate": 3.7421689445240075e-05, - "loss": 0.7324, - "step": 85370 - }, - { - "epoch": 0.7547870365459078, - "grad_norm": 3.273371458053589, - "learning_rate": 3.74202160575682e-05, - "loss": 0.6691, - "step": 85380 - }, - { - "epoch": 0.7548754398062201, - "grad_norm": 3.8757667541503906, - "learning_rate": 3.741874266989633e-05, - "loss": 0.5976, - "step": 85390 - }, - { - "epoch": 0.7549638430665323, - "grad_norm": 2.590461015701294, - "learning_rate": 3.7417269282224466e-05, - "loss": 0.6462, - "step": 85400 - }, - { - "epoch": 0.7550522463268445, - "grad_norm": 2.8991386890411377, - "learning_rate": 3.741579589455259e-05, - "loss": 0.7577, - "step": 85410 - }, - { - "epoch": 0.7551406495871568, - "grad_norm": 6.056429862976074, - "learning_rate": 3.741432250688072e-05, - "loss": 0.7301, - "step": 85420 - }, - { - "epoch": 0.755229052847469, - "grad_norm": 1.267293095588684, - "learning_rate": 3.741284911920885e-05, - "loss": 0.6723, - "step": 85430 - }, - { - "epoch": 0.7553174561077812, - "grad_norm": 1.804534673690796, - "learning_rate": 3.741137573153698e-05, - "loss": 0.5866, - "step": 85440 - }, - { - "epoch": 0.7554058593680935, - "grad_norm": 5.3751726150512695, - "learning_rate": 3.740990234386511e-05, - "loss": 0.6297, - "step": 85450 - }, - { - "epoch": 0.7554942626284057, - "grad_norm": 1.5884987115859985, - "learning_rate": 3.740842895619324e-05, - "loss": 0.6622, - "step": 85460 - }, - { - "epoch": 0.755582665888718, - "grad_norm": 1.7796268463134766, - "learning_rate": 3.7406955568521365e-05, - "loss": 0.6744, - "step": 85470 - }, - { - "epoch": 0.7556710691490303, - "grad_norm": 2.3204240798950195, - "learning_rate": 3.74054821808495e-05, - "loss": 0.6614, - "step": 85480 - }, - { - "epoch": 0.7557594724093425, - "grad_norm": 11.465657234191895, - "learning_rate": 3.740400879317763e-05, - "loss": 0.69, - "step": 85490 - }, - { - "epoch": 0.7558478756696547, - "grad_norm": 5.745561599731445, - "learning_rate": 3.740253540550576e-05, - "loss": 0.7496, - "step": 85500 - }, - { - "epoch": 0.755936278929967, - "grad_norm": 4.5895867347717285, - "learning_rate": 3.7401062017833885e-05, - "loss": 0.5997, - "step": 85510 - }, - { - "epoch": 0.7560246821902792, - "grad_norm": 8.398486137390137, - "learning_rate": 3.7399588630162013e-05, - "loss": 0.7039, - "step": 85520 - }, - { - "epoch": 0.7561130854505914, - "grad_norm": 2.9003798961639404, - "learning_rate": 3.739811524249014e-05, - "loss": 0.5834, - "step": 85530 - }, - { - "epoch": 0.7562014887109036, - "grad_norm": 4.476251602172852, - "learning_rate": 3.739664185481828e-05, - "loss": 0.5525, - "step": 85540 - }, - { - "epoch": 0.7562898919712159, - "grad_norm": 2.4741172790527344, - "learning_rate": 3.7395168467146405e-05, - "loss": 0.6641, - "step": 85550 - }, - { - "epoch": 0.7563782952315281, - "grad_norm": 3.6845388412475586, - "learning_rate": 3.7393695079474534e-05, - "loss": 0.654, - "step": 85560 - }, - { - "epoch": 0.7564666984918403, - "grad_norm": 4.938345909118652, - "learning_rate": 3.739222169180266e-05, - "loss": 0.5988, - "step": 85570 - }, - { - "epoch": 0.7565551017521526, - "grad_norm": 4.465521812438965, - "learning_rate": 3.739074830413079e-05, - "loss": 0.7493, - "step": 85580 - }, - { - "epoch": 0.7566435050124649, - "grad_norm": 2.490525722503662, - "learning_rate": 3.738927491645892e-05, - "loss": 0.6856, - "step": 85590 - }, - { - "epoch": 0.7567319082727771, - "grad_norm": 10.04533863067627, - "learning_rate": 3.738780152878705e-05, - "loss": 0.6438, - "step": 85600 - }, - { - "epoch": 0.7568203115330894, - "grad_norm": 2.158148765563965, - "learning_rate": 3.738632814111518e-05, - "loss": 0.7951, - "step": 85610 - }, - { - "epoch": 0.7569087147934016, - "grad_norm": 4.396143913269043, - "learning_rate": 3.738485475344331e-05, - "loss": 0.5549, - "step": 85620 - }, - { - "epoch": 0.7569971180537138, - "grad_norm": 2.749972343444824, - "learning_rate": 3.738338136577144e-05, - "loss": 0.6336, - "step": 85630 - }, - { - "epoch": 0.7570855213140261, - "grad_norm": 1.941791296005249, - "learning_rate": 3.738190797809957e-05, - "loss": 0.7297, - "step": 85640 - }, - { - "epoch": 0.7571739245743383, - "grad_norm": 2.1761698722839355, - "learning_rate": 3.7380434590427696e-05, - "loss": 0.782, - "step": 85650 - }, - { - "epoch": 0.7572623278346505, - "grad_norm": 2.907989740371704, - "learning_rate": 3.7378961202755824e-05, - "loss": 0.8016, - "step": 85660 - }, - { - "epoch": 0.7573507310949628, - "grad_norm": 13.42611026763916, - "learning_rate": 3.737748781508396e-05, - "loss": 0.6224, - "step": 85670 - }, - { - "epoch": 0.757439134355275, - "grad_norm": 2.420255661010742, - "learning_rate": 3.737601442741209e-05, - "loss": 0.6448, - "step": 85680 - }, - { - "epoch": 0.7575275376155872, - "grad_norm": 1.5082685947418213, - "learning_rate": 3.7374541039740216e-05, - "loss": 0.6928, - "step": 85690 - }, - { - "epoch": 0.7576159408758995, - "grad_norm": 7.950104236602783, - "learning_rate": 3.7373067652068344e-05, - "loss": 0.5604, - "step": 85700 - }, - { - "epoch": 0.7577043441362118, - "grad_norm": 3.9872817993164062, - "learning_rate": 3.737159426439647e-05, - "loss": 0.6927, - "step": 85710 - }, - { - "epoch": 0.757792747396524, - "grad_norm": 3.4432690143585205, - "learning_rate": 3.73701208767246e-05, - "loss": 0.6204, - "step": 85720 - }, - { - "epoch": 0.7578811506568363, - "grad_norm": 5.041614055633545, - "learning_rate": 3.7368647489052736e-05, - "loss": 0.7157, - "step": 85730 - }, - { - "epoch": 0.7579695539171485, - "grad_norm": 5.1985321044921875, - "learning_rate": 3.736717410138086e-05, - "loss": 0.7084, - "step": 85740 - }, - { - "epoch": 0.7580579571774607, - "grad_norm": 9.321937561035156, - "learning_rate": 3.736570071370899e-05, - "loss": 0.7561, - "step": 85750 - }, - { - "epoch": 0.758146360437773, - "grad_norm": 1.8656742572784424, - "learning_rate": 3.736422732603712e-05, - "loss": 0.6805, - "step": 85760 - }, - { - "epoch": 0.7582347636980852, - "grad_norm": 2.554158926010132, - "learning_rate": 3.736275393836525e-05, - "loss": 0.6077, - "step": 85770 - }, - { - "epoch": 0.7583231669583974, - "grad_norm": 2.7132935523986816, - "learning_rate": 3.736128055069338e-05, - "loss": 0.7703, - "step": 85780 - }, - { - "epoch": 0.7584115702187096, - "grad_norm": 7.1925740242004395, - "learning_rate": 3.735980716302151e-05, - "loss": 0.5611, - "step": 85790 - }, - { - "epoch": 0.7584999734790219, - "grad_norm": 1.8949414491653442, - "learning_rate": 3.7358333775349635e-05, - "loss": 0.7039, - "step": 85800 - }, - { - "epoch": 0.7585883767393341, - "grad_norm": 0.8182306885719299, - "learning_rate": 3.735686038767777e-05, - "loss": 0.5058, - "step": 85810 - }, - { - "epoch": 0.7586767799996463, - "grad_norm": 4.480530261993408, - "learning_rate": 3.73553870000059e-05, - "loss": 0.5975, - "step": 85820 - }, - { - "epoch": 0.7587651832599587, - "grad_norm": 2.9453999996185303, - "learning_rate": 3.7353913612334026e-05, - "loss": 0.8073, - "step": 85830 - }, - { - "epoch": 0.7588535865202709, - "grad_norm": 8.90038013458252, - "learning_rate": 3.7352440224662155e-05, - "loss": 0.6849, - "step": 85840 - }, - { - "epoch": 0.7589419897805831, - "grad_norm": 2.3736259937286377, - "learning_rate": 3.735096683699028e-05, - "loss": 0.6218, - "step": 85850 - }, - { - "epoch": 0.7590303930408954, - "grad_norm": 2.652662515640259, - "learning_rate": 3.734949344931841e-05, - "loss": 0.6121, - "step": 85860 - }, - { - "epoch": 0.7591187963012076, - "grad_norm": 1.1810376644134521, - "learning_rate": 3.7348020061646547e-05, - "loss": 0.8128, - "step": 85870 - }, - { - "epoch": 0.7592071995615198, - "grad_norm": 1.275516152381897, - "learning_rate": 3.734654667397467e-05, - "loss": 0.6507, - "step": 85880 - }, - { - "epoch": 0.7592956028218321, - "grad_norm": 1.1071529388427734, - "learning_rate": 3.73450732863028e-05, - "loss": 0.5529, - "step": 85890 - }, - { - "epoch": 0.7593840060821443, - "grad_norm": 2.1914048194885254, - "learning_rate": 3.734359989863093e-05, - "loss": 0.661, - "step": 85900 - }, - { - "epoch": 0.7594724093424565, - "grad_norm": 2.3267629146575928, - "learning_rate": 3.734212651095906e-05, - "loss": 0.8045, - "step": 85910 - }, - { - "epoch": 0.7595608126027688, - "grad_norm": 2.9179482460021973, - "learning_rate": 3.734065312328719e-05, - "loss": 0.6407, - "step": 85920 - }, - { - "epoch": 0.759649215863081, - "grad_norm": 6.203660488128662, - "learning_rate": 3.7339179735615323e-05, - "loss": 0.7239, - "step": 85930 - }, - { - "epoch": 0.7597376191233933, - "grad_norm": 1.300817608833313, - "learning_rate": 3.7337706347943445e-05, - "loss": 0.6637, - "step": 85940 - }, - { - "epoch": 0.7598260223837056, - "grad_norm": 11.422124862670898, - "learning_rate": 3.733623296027158e-05, - "loss": 0.53, - "step": 85950 - }, - { - "epoch": 0.7599144256440178, - "grad_norm": 1.13628351688385, - "learning_rate": 3.73347595725997e-05, - "loss": 0.6933, - "step": 85960 - }, - { - "epoch": 0.76000282890433, - "grad_norm": 9.199313163757324, - "learning_rate": 3.733328618492784e-05, - "loss": 0.5772, - "step": 85970 - }, - { - "epoch": 0.7600912321646422, - "grad_norm": 1.7413986921310425, - "learning_rate": 3.7331812797255965e-05, - "loss": 0.5428, - "step": 85980 - }, - { - "epoch": 0.7601796354249545, - "grad_norm": 1.0740153789520264, - "learning_rate": 3.7330339409584094e-05, - "loss": 0.6409, - "step": 85990 - }, - { - "epoch": 0.7602680386852667, - "grad_norm": 1.8425790071487427, - "learning_rate": 3.732886602191222e-05, - "loss": 0.6859, - "step": 86000 - }, - { - "epoch": 0.7603564419455789, - "grad_norm": 7.294825553894043, - "learning_rate": 3.732739263424036e-05, - "loss": 0.706, - "step": 86010 - }, - { - "epoch": 0.7604448452058912, - "grad_norm": 5.672529697418213, - "learning_rate": 3.732591924656848e-05, - "loss": 0.6576, - "step": 86020 - }, - { - "epoch": 0.7605332484662034, - "grad_norm": 2.1200952529907227, - "learning_rate": 3.7324445858896614e-05, - "loss": 0.6754, - "step": 86030 - }, - { - "epoch": 0.7606216517265156, - "grad_norm": 1.5939382314682007, - "learning_rate": 3.732297247122474e-05, - "loss": 0.588, - "step": 86040 - }, - { - "epoch": 0.7607100549868279, - "grad_norm": 3.9748353958129883, - "learning_rate": 3.732149908355287e-05, - "loss": 0.7642, - "step": 86050 - }, - { - "epoch": 0.7607984582471402, - "grad_norm": 1.7719999551773071, - "learning_rate": 3.7320025695881e-05, - "loss": 0.7279, - "step": 86060 - }, - { - "epoch": 0.7608868615074524, - "grad_norm": 2.2005772590637207, - "learning_rate": 3.731855230820913e-05, - "loss": 0.5966, - "step": 86070 - }, - { - "epoch": 0.7609752647677647, - "grad_norm": 13.298222541809082, - "learning_rate": 3.7317078920537256e-05, - "loss": 0.6331, - "step": 86080 - }, - { - "epoch": 0.7610636680280769, - "grad_norm": 9.629752159118652, - "learning_rate": 3.731560553286539e-05, - "loss": 0.7111, - "step": 86090 - }, - { - "epoch": 0.7611520712883891, - "grad_norm": 10.915865898132324, - "learning_rate": 3.731413214519351e-05, - "loss": 0.6354, - "step": 86100 - }, - { - "epoch": 0.7612404745487014, - "grad_norm": 9.142168045043945, - "learning_rate": 3.731265875752165e-05, - "loss": 0.6389, - "step": 86110 - }, - { - "epoch": 0.7613288778090136, - "grad_norm": 2.2176826000213623, - "learning_rate": 3.7311185369849776e-05, - "loss": 0.7594, - "step": 86120 - }, - { - "epoch": 0.7614172810693258, - "grad_norm": 3.262695789337158, - "learning_rate": 3.7309711982177904e-05, - "loss": 0.6558, - "step": 86130 - }, - { - "epoch": 0.761505684329638, - "grad_norm": 11.032332420349121, - "learning_rate": 3.730823859450603e-05, - "loss": 0.5876, - "step": 86140 - }, - { - "epoch": 0.7615940875899503, - "grad_norm": 1.451194167137146, - "learning_rate": 3.730676520683417e-05, - "loss": 0.6445, - "step": 86150 - }, - { - "epoch": 0.7616824908502625, - "grad_norm": 1.378336787223816, - "learning_rate": 3.730529181916229e-05, - "loss": 0.5879, - "step": 86160 - }, - { - "epoch": 0.7617708941105747, - "grad_norm": 7.298768520355225, - "learning_rate": 3.7303818431490424e-05, - "loss": 0.6221, - "step": 86170 - }, - { - "epoch": 0.7618592973708871, - "grad_norm": 2.8144688606262207, - "learning_rate": 3.7302345043818546e-05, - "loss": 0.7999, - "step": 86180 - }, - { - "epoch": 0.7619477006311993, - "grad_norm": 2.1327362060546875, - "learning_rate": 3.730087165614668e-05, - "loss": 0.7028, - "step": 86190 - }, - { - "epoch": 0.7620361038915116, - "grad_norm": 9.01235294342041, - "learning_rate": 3.729939826847481e-05, - "loss": 0.6731, - "step": 86200 - }, - { - "epoch": 0.7621245071518238, - "grad_norm": 0.987602174282074, - "learning_rate": 3.729792488080294e-05, - "loss": 0.6702, - "step": 86210 - }, - { - "epoch": 0.762212910412136, - "grad_norm": 1.9932714700698853, - "learning_rate": 3.7296451493131066e-05, - "loss": 0.5742, - "step": 86220 - }, - { - "epoch": 0.7623013136724482, - "grad_norm": 2.0963876247406006, - "learning_rate": 3.72949781054592e-05, - "loss": 0.6345, - "step": 86230 - }, - { - "epoch": 0.7623897169327605, - "grad_norm": 3.216031312942505, - "learning_rate": 3.729350471778732e-05, - "loss": 0.7798, - "step": 86240 - }, - { - "epoch": 0.7624781201930727, - "grad_norm": 2.305607795715332, - "learning_rate": 3.729203133011546e-05, - "loss": 0.6234, - "step": 86250 - }, - { - "epoch": 0.7625665234533849, - "grad_norm": 1.9500572681427002, - "learning_rate": 3.7290557942443586e-05, - "loss": 0.597, - "step": 86260 - }, - { - "epoch": 0.7626549267136972, - "grad_norm": 0.8851110935211182, - "learning_rate": 3.7289084554771715e-05, - "loss": 0.5644, - "step": 86270 - }, - { - "epoch": 0.7627433299740094, - "grad_norm": 3.3796939849853516, - "learning_rate": 3.728761116709984e-05, - "loss": 0.7794, - "step": 86280 - }, - { - "epoch": 0.7628317332343216, - "grad_norm": 2.7242753505706787, - "learning_rate": 3.728613777942798e-05, - "loss": 0.6771, - "step": 86290 - }, - { - "epoch": 0.762920136494634, - "grad_norm": 1.906731128692627, - "learning_rate": 3.72846643917561e-05, - "loss": 0.5904, - "step": 86300 - }, - { - "epoch": 0.7630085397549462, - "grad_norm": 2.6325597763061523, - "learning_rate": 3.7283191004084235e-05, - "loss": 0.7392, - "step": 86310 - }, - { - "epoch": 0.7630969430152584, - "grad_norm": 3.9864730834960938, - "learning_rate": 3.7281717616412356e-05, - "loss": 0.5748, - "step": 86320 - }, - { - "epoch": 0.7631853462755707, - "grad_norm": 1.0226085186004639, - "learning_rate": 3.728024422874049e-05, - "loss": 0.6245, - "step": 86330 - }, - { - "epoch": 0.7632737495358829, - "grad_norm": 2.6433935165405273, - "learning_rate": 3.727877084106862e-05, - "loss": 0.601, - "step": 86340 - }, - { - "epoch": 0.7633621527961951, - "grad_norm": 7.856193542480469, - "learning_rate": 3.727729745339675e-05, - "loss": 0.8347, - "step": 86350 - }, - { - "epoch": 0.7634505560565074, - "grad_norm": 2.9784162044525146, - "learning_rate": 3.7275824065724877e-05, - "loss": 0.5236, - "step": 86360 - }, - { - "epoch": 0.7635389593168196, - "grad_norm": 3.378016710281372, - "learning_rate": 3.727435067805301e-05, - "loss": 0.6383, - "step": 86370 - }, - { - "epoch": 0.7636273625771318, - "grad_norm": 1.6524105072021484, - "learning_rate": 3.727287729038113e-05, - "loss": 0.6702, - "step": 86380 - }, - { - "epoch": 0.763715765837444, - "grad_norm": 6.8003435134887695, - "learning_rate": 3.727140390270927e-05, - "loss": 0.697, - "step": 86390 - }, - { - "epoch": 0.7638041690977563, - "grad_norm": 1.075705647468567, - "learning_rate": 3.72699305150374e-05, - "loss": 0.6677, - "step": 86400 - }, - { - "epoch": 0.7638925723580685, - "grad_norm": 4.592240810394287, - "learning_rate": 3.7268457127365525e-05, - "loss": 0.6444, - "step": 86410 - }, - { - "epoch": 0.7639809756183809, - "grad_norm": 1.935365080833435, - "learning_rate": 3.7266983739693653e-05, - "loss": 0.677, - "step": 86420 - }, - { - "epoch": 0.7640693788786931, - "grad_norm": 1.6860672235488892, - "learning_rate": 3.726551035202178e-05, - "loss": 0.5884, - "step": 86430 - }, - { - "epoch": 0.7641577821390053, - "grad_norm": 1.514349102973938, - "learning_rate": 3.726403696434991e-05, - "loss": 0.7351, - "step": 86440 - }, - { - "epoch": 0.7642461853993175, - "grad_norm": 1.893250584602356, - "learning_rate": 3.7262563576678045e-05, - "loss": 0.7859, - "step": 86450 - }, - { - "epoch": 0.7643345886596298, - "grad_norm": 6.0112385749816895, - "learning_rate": 3.7261090189006174e-05, - "loss": 0.5757, - "step": 86460 - }, - { - "epoch": 0.764422991919942, - "grad_norm": 7.275670528411865, - "learning_rate": 3.72596168013343e-05, - "loss": 0.6174, - "step": 86470 - }, - { - "epoch": 0.7645113951802542, - "grad_norm": 3.070352077484131, - "learning_rate": 3.725814341366243e-05, - "loss": 0.6087, - "step": 86480 - }, - { - "epoch": 0.7645997984405665, - "grad_norm": 3.509242296218872, - "learning_rate": 3.725667002599056e-05, - "loss": 0.5485, - "step": 86490 - }, - { - "epoch": 0.7646882017008787, - "grad_norm": 9.614588737487793, - "learning_rate": 3.725519663831869e-05, - "loss": 0.6283, - "step": 86500 - }, - { - "epoch": 0.7647766049611909, - "grad_norm": 3.4603703022003174, - "learning_rate": 3.725372325064682e-05, - "loss": 0.6028, - "step": 86510 - }, - { - "epoch": 0.7648650082215032, - "grad_norm": 4.604862689971924, - "learning_rate": 3.725224986297495e-05, - "loss": 0.6593, - "step": 86520 - }, - { - "epoch": 0.7649534114818155, - "grad_norm": 2.411860466003418, - "learning_rate": 3.725077647530308e-05, - "loss": 0.6644, - "step": 86530 - }, - { - "epoch": 0.7650418147421277, - "grad_norm": 6.114974021911621, - "learning_rate": 3.724930308763121e-05, - "loss": 0.665, - "step": 86540 - }, - { - "epoch": 0.76513021800244, - "grad_norm": 1.5121123790740967, - "learning_rate": 3.7247829699959336e-05, - "loss": 0.7448, - "step": 86550 - }, - { - "epoch": 0.7652186212627522, - "grad_norm": 13.443902969360352, - "learning_rate": 3.7246356312287464e-05, - "loss": 0.6595, - "step": 86560 - }, - { - "epoch": 0.7653070245230644, - "grad_norm": 10.518452644348145, - "learning_rate": 3.724488292461559e-05, - "loss": 0.6494, - "step": 86570 - }, - { - "epoch": 0.7653954277833767, - "grad_norm": 3.0383522510528564, - "learning_rate": 3.724340953694373e-05, - "loss": 0.7022, - "step": 86580 - }, - { - "epoch": 0.7654838310436889, - "grad_norm": 0.9191209673881531, - "learning_rate": 3.7241936149271856e-05, - "loss": 0.5543, - "step": 86590 - }, - { - "epoch": 0.7655722343040011, - "grad_norm": 2.0296833515167236, - "learning_rate": 3.7240462761599984e-05, - "loss": 0.6574, - "step": 86600 - }, - { - "epoch": 0.7656606375643134, - "grad_norm": 4.197606086730957, - "learning_rate": 3.723898937392811e-05, - "loss": 0.6363, - "step": 86610 - }, - { - "epoch": 0.7657490408246256, - "grad_norm": 2.9814839363098145, - "learning_rate": 3.723751598625624e-05, - "loss": 0.7775, - "step": 86620 - }, - { - "epoch": 0.7658374440849378, - "grad_norm": 1.5551029443740845, - "learning_rate": 3.723604259858437e-05, - "loss": 0.6589, - "step": 86630 - }, - { - "epoch": 0.76592584734525, - "grad_norm": 2.176035165786743, - "learning_rate": 3.7234569210912504e-05, - "loss": 0.6282, - "step": 86640 - }, - { - "epoch": 0.7660142506055624, - "grad_norm": 7.538548469543457, - "learning_rate": 3.7233095823240626e-05, - "loss": 0.7455, - "step": 86650 - }, - { - "epoch": 0.7661026538658746, - "grad_norm": 2.308427572250366, - "learning_rate": 3.723162243556876e-05, - "loss": 0.5778, - "step": 86660 - }, - { - "epoch": 0.7661910571261868, - "grad_norm": 3.6587073802948, - "learning_rate": 3.723014904789689e-05, - "loss": 0.5609, - "step": 86670 - }, - { - "epoch": 0.7662794603864991, - "grad_norm": 6.131343364715576, - "learning_rate": 3.722867566022502e-05, - "loss": 0.7635, - "step": 86680 - }, - { - "epoch": 0.7663678636468113, - "grad_norm": 2.838127374649048, - "learning_rate": 3.7227202272553146e-05, - "loss": 0.6712, - "step": 86690 - }, - { - "epoch": 0.7664562669071235, - "grad_norm": 4.57146692276001, - "learning_rate": 3.722572888488128e-05, - "loss": 0.5437, - "step": 86700 - }, - { - "epoch": 0.7665446701674358, - "grad_norm": 1.599280595779419, - "learning_rate": 3.72242554972094e-05, - "loss": 0.7483, - "step": 86710 - }, - { - "epoch": 0.766633073427748, - "grad_norm": 3.5061333179473877, - "learning_rate": 3.722278210953754e-05, - "loss": 0.6974, - "step": 86720 - }, - { - "epoch": 0.7667214766880602, - "grad_norm": 1.9312424659729004, - "learning_rate": 3.7221308721865666e-05, - "loss": 0.7671, - "step": 86730 - }, - { - "epoch": 0.7668098799483725, - "grad_norm": 11.15473461151123, - "learning_rate": 3.7219835334193795e-05, - "loss": 0.5768, - "step": 86740 - }, - { - "epoch": 0.7668982832086847, - "grad_norm": 1.9385086297988892, - "learning_rate": 3.721836194652192e-05, - "loss": 0.5882, - "step": 86750 - }, - { - "epoch": 0.7669866864689969, - "grad_norm": 2.1111316680908203, - "learning_rate": 3.721688855885006e-05, - "loss": 0.6912, - "step": 86760 - }, - { - "epoch": 0.7670750897293093, - "grad_norm": 6.412662506103516, - "learning_rate": 3.721541517117818e-05, - "loss": 0.6348, - "step": 86770 - }, - { - "epoch": 0.7671634929896215, - "grad_norm": 2.2824347019195557, - "learning_rate": 3.7213941783506315e-05, - "loss": 0.6453, - "step": 86780 - }, - { - "epoch": 0.7672518962499337, - "grad_norm": 4.151925086975098, - "learning_rate": 3.7212468395834436e-05, - "loss": 0.688, - "step": 86790 - }, - { - "epoch": 0.767340299510246, - "grad_norm": 1.4501419067382812, - "learning_rate": 3.721099500816257e-05, - "loss": 0.7009, - "step": 86800 - }, - { - "epoch": 0.7674287027705582, - "grad_norm": 5.784538269042969, - "learning_rate": 3.72095216204907e-05, - "loss": 0.6477, - "step": 86810 - }, - { - "epoch": 0.7675171060308704, - "grad_norm": 3.613633632659912, - "learning_rate": 3.720804823281883e-05, - "loss": 0.5474, - "step": 86820 - }, - { - "epoch": 0.7676055092911827, - "grad_norm": 1.507540225982666, - "learning_rate": 3.7206574845146957e-05, - "loss": 0.5756, - "step": 86830 - }, - { - "epoch": 0.7676939125514949, - "grad_norm": 2.6359832286834717, - "learning_rate": 3.720510145747509e-05, - "loss": 0.6383, - "step": 86840 - }, - { - "epoch": 0.7677823158118071, - "grad_norm": 5.860042095184326, - "learning_rate": 3.720362806980321e-05, - "loss": 0.6709, - "step": 86850 - }, - { - "epoch": 0.7678707190721193, - "grad_norm": 4.933174133300781, - "learning_rate": 3.720215468213135e-05, - "loss": 0.7577, - "step": 86860 - }, - { - "epoch": 0.7679591223324316, - "grad_norm": 4.768065929412842, - "learning_rate": 3.720068129445948e-05, - "loss": 0.5741, - "step": 86870 - }, - { - "epoch": 0.7680475255927438, - "grad_norm": 3.4530088901519775, - "learning_rate": 3.7199207906787605e-05, - "loss": 0.692, - "step": 86880 - }, - { - "epoch": 0.7681359288530561, - "grad_norm": 2.5492825508117676, - "learning_rate": 3.7197734519115734e-05, - "loss": 0.6823, - "step": 86890 - }, - { - "epoch": 0.7682243321133684, - "grad_norm": 4.962037563323975, - "learning_rate": 3.719626113144386e-05, - "loss": 0.6342, - "step": 86900 - }, - { - "epoch": 0.7683127353736806, - "grad_norm": 4.782608509063721, - "learning_rate": 3.719478774377199e-05, - "loss": 0.5894, - "step": 86910 - }, - { - "epoch": 0.7684011386339928, - "grad_norm": 12.531340599060059, - "learning_rate": 3.7193314356100125e-05, - "loss": 0.729, - "step": 86920 - }, - { - "epoch": 0.7684895418943051, - "grad_norm": 3.969730854034424, - "learning_rate": 3.719184096842825e-05, - "loss": 0.6603, - "step": 86930 - }, - { - "epoch": 0.7685779451546173, - "grad_norm": 6.431152820587158, - "learning_rate": 3.719036758075638e-05, - "loss": 0.6721, - "step": 86940 - }, - { - "epoch": 0.7686663484149295, - "grad_norm": 1.359704852104187, - "learning_rate": 3.718889419308451e-05, - "loss": 0.6226, - "step": 86950 - }, - { - "epoch": 0.7687547516752418, - "grad_norm": 1.1664873361587524, - "learning_rate": 3.718742080541264e-05, - "loss": 0.5415, - "step": 86960 - }, - { - "epoch": 0.768843154935554, - "grad_norm": 5.02387809753418, - "learning_rate": 3.718594741774077e-05, - "loss": 0.5928, - "step": 86970 - }, - { - "epoch": 0.7689315581958662, - "grad_norm": 1.8267531394958496, - "learning_rate": 3.71844740300689e-05, - "loss": 0.535, - "step": 86980 - }, - { - "epoch": 0.7690199614561785, - "grad_norm": 1.6625699996948242, - "learning_rate": 3.7183000642397024e-05, - "loss": 0.721, - "step": 86990 - }, - { - "epoch": 0.7691083647164907, - "grad_norm": 0.972973108291626, - "learning_rate": 3.718152725472516e-05, - "loss": 0.6983, - "step": 87000 - }, - { - "epoch": 0.769196767976803, - "grad_norm": 8.324231147766113, - "learning_rate": 3.718005386705328e-05, - "loss": 0.6716, - "step": 87010 - }, - { - "epoch": 0.7692851712371153, - "grad_norm": 2.3789448738098145, - "learning_rate": 3.7178580479381416e-05, - "loss": 0.5998, - "step": 87020 - }, - { - "epoch": 0.7693735744974275, - "grad_norm": 2.331791877746582, - "learning_rate": 3.7177107091709544e-05, - "loss": 0.716, - "step": 87030 - }, - { - "epoch": 0.7694619777577397, - "grad_norm": 7.855481147766113, - "learning_rate": 3.717563370403767e-05, - "loss": 0.7022, - "step": 87040 - }, - { - "epoch": 0.769550381018052, - "grad_norm": 4.423086166381836, - "learning_rate": 3.71741603163658e-05, - "loss": 0.6298, - "step": 87050 - }, - { - "epoch": 0.7696387842783642, - "grad_norm": 2.639465808868408, - "learning_rate": 3.7172686928693936e-05, - "loss": 0.7328, - "step": 87060 - }, - { - "epoch": 0.7697271875386764, - "grad_norm": 6.835954189300537, - "learning_rate": 3.717121354102206e-05, - "loss": 0.6145, - "step": 87070 - }, - { - "epoch": 0.7698155907989886, - "grad_norm": 3.1799473762512207, - "learning_rate": 3.716974015335019e-05, - "loss": 0.6323, - "step": 87080 - }, - { - "epoch": 0.7699039940593009, - "grad_norm": 1.1374777555465698, - "learning_rate": 3.716826676567832e-05, - "loss": 0.6963, - "step": 87090 - }, - { - "epoch": 0.7699923973196131, - "grad_norm": 9.492671012878418, - "learning_rate": 3.716679337800645e-05, - "loss": 0.643, - "step": 87100 - }, - { - "epoch": 0.7700808005799253, - "grad_norm": 2.9431264400482178, - "learning_rate": 3.716531999033458e-05, - "loss": 0.6186, - "step": 87110 - }, - { - "epoch": 0.7701692038402377, - "grad_norm": 8.265769004821777, - "learning_rate": 3.7163846602662706e-05, - "loss": 0.7168, - "step": 87120 - }, - { - "epoch": 0.7702576071005499, - "grad_norm": 5.6462788581848145, - "learning_rate": 3.7162373214990834e-05, - "loss": 0.6878, - "step": 87130 - }, - { - "epoch": 0.7703460103608621, - "grad_norm": 1.2095264196395874, - "learning_rate": 3.716089982731897e-05, - "loss": 0.6237, - "step": 87140 - }, - { - "epoch": 0.7704344136211744, - "grad_norm": 1.564755916595459, - "learning_rate": 3.715942643964709e-05, - "loss": 0.6208, - "step": 87150 - }, - { - "epoch": 0.7705228168814866, - "grad_norm": 8.673503875732422, - "learning_rate": 3.7157953051975226e-05, - "loss": 0.6998, - "step": 87160 - }, - { - "epoch": 0.7706112201417988, - "grad_norm": 4.018807888031006, - "learning_rate": 3.7156479664303355e-05, - "loss": 0.8663, - "step": 87170 - }, - { - "epoch": 0.7706996234021111, - "grad_norm": 3.4271037578582764, - "learning_rate": 3.715500627663148e-05, - "loss": 0.5504, - "step": 87180 - }, - { - "epoch": 0.7707880266624233, - "grad_norm": 2.202488899230957, - "learning_rate": 3.715353288895961e-05, - "loss": 0.6589, - "step": 87190 - }, - { - "epoch": 0.7708764299227355, - "grad_norm": 0.8874525427818298, - "learning_rate": 3.7152059501287746e-05, - "loss": 0.8064, - "step": 87200 - }, - { - "epoch": 0.7709648331830478, - "grad_norm": 5.62076473236084, - "learning_rate": 3.715058611361587e-05, - "loss": 0.7078, - "step": 87210 - }, - { - "epoch": 0.77105323644336, - "grad_norm": 3.08376407623291, - "learning_rate": 3.7149112725944e-05, - "loss": 0.6404, - "step": 87220 - }, - { - "epoch": 0.7711416397036722, - "grad_norm": 2.968723773956299, - "learning_rate": 3.714763933827213e-05, - "loss": 0.741, - "step": 87230 - }, - { - "epoch": 0.7712300429639846, - "grad_norm": 3.1863155364990234, - "learning_rate": 3.714616595060026e-05, - "loss": 0.6871, - "step": 87240 - }, - { - "epoch": 0.7713184462242968, - "grad_norm": 2.1257872581481934, - "learning_rate": 3.714469256292839e-05, - "loss": 0.6134, - "step": 87250 - }, - { - "epoch": 0.771406849484609, - "grad_norm": 3.2810986042022705, - "learning_rate": 3.7143219175256516e-05, - "loss": 0.5887, - "step": 87260 - }, - { - "epoch": 0.7714952527449213, - "grad_norm": 3.897430419921875, - "learning_rate": 3.7141745787584645e-05, - "loss": 0.5798, - "step": 87270 - }, - { - "epoch": 0.7715836560052335, - "grad_norm": 1.7576478719711304, - "learning_rate": 3.714027239991278e-05, - "loss": 0.6754, - "step": 87280 - }, - { - "epoch": 0.7716720592655457, - "grad_norm": 1.6312427520751953, - "learning_rate": 3.71387990122409e-05, - "loss": 0.5702, - "step": 87290 - }, - { - "epoch": 0.771760462525858, - "grad_norm": 6.856921672821045, - "learning_rate": 3.713732562456904e-05, - "loss": 0.6354, - "step": 87300 - }, - { - "epoch": 0.7718488657861702, - "grad_norm": 2.8430116176605225, - "learning_rate": 3.7135852236897165e-05, - "loss": 0.6825, - "step": 87310 - }, - { - "epoch": 0.7719372690464824, - "grad_norm": 1.7134850025177002, - "learning_rate": 3.7134378849225293e-05, - "loss": 0.6339, - "step": 87320 - }, - { - "epoch": 0.7720256723067946, - "grad_norm": 2.7168121337890625, - "learning_rate": 3.713290546155342e-05, - "loss": 0.648, - "step": 87330 - }, - { - "epoch": 0.7721140755671069, - "grad_norm": 3.56308650970459, - "learning_rate": 3.713143207388156e-05, - "loss": 0.7647, - "step": 87340 - }, - { - "epoch": 0.7722024788274191, - "grad_norm": 2.641613721847534, - "learning_rate": 3.712995868620968e-05, - "loss": 0.6659, - "step": 87350 - }, - { - "epoch": 0.7722908820877314, - "grad_norm": 4.132381916046143, - "learning_rate": 3.7128485298537814e-05, - "loss": 0.785, - "step": 87360 - }, - { - "epoch": 0.7723792853480437, - "grad_norm": 4.496201992034912, - "learning_rate": 3.712701191086594e-05, - "loss": 0.7201, - "step": 87370 - }, - { - "epoch": 0.7724676886083559, - "grad_norm": 2.0849084854125977, - "learning_rate": 3.712553852319407e-05, - "loss": 0.6724, - "step": 87380 - }, - { - "epoch": 0.7725560918686681, - "grad_norm": 2.197014331817627, - "learning_rate": 3.71240651355222e-05, - "loss": 0.6605, - "step": 87390 - }, - { - "epoch": 0.7726444951289804, - "grad_norm": 3.115480422973633, - "learning_rate": 3.712259174785033e-05, - "loss": 0.6514, - "step": 87400 - }, - { - "epoch": 0.7727328983892926, - "grad_norm": 3.5521392822265625, - "learning_rate": 3.7121118360178455e-05, - "loss": 0.5559, - "step": 87410 - }, - { - "epoch": 0.7728213016496048, - "grad_norm": 3.6806702613830566, - "learning_rate": 3.711964497250659e-05, - "loss": 0.6868, - "step": 87420 - }, - { - "epoch": 0.7729097049099171, - "grad_norm": 2.708115339279175, - "learning_rate": 3.711817158483472e-05, - "loss": 0.6085, - "step": 87430 - }, - { - "epoch": 0.7729981081702293, - "grad_norm": 3.2188241481781006, - "learning_rate": 3.711669819716285e-05, - "loss": 0.5919, - "step": 87440 - }, - { - "epoch": 0.7730865114305415, - "grad_norm": 9.90634536743164, - "learning_rate": 3.7115224809490976e-05, - "loss": 0.6776, - "step": 87450 - }, - { - "epoch": 0.7731749146908538, - "grad_norm": 8.803030967712402, - "learning_rate": 3.7113751421819104e-05, - "loss": 0.6571, - "step": 87460 - }, - { - "epoch": 0.773263317951166, - "grad_norm": 5.876282691955566, - "learning_rate": 3.711227803414723e-05, - "loss": 0.7332, - "step": 87470 - }, - { - "epoch": 0.7733517212114783, - "grad_norm": 1.35496187210083, - "learning_rate": 3.711080464647536e-05, - "loss": 0.6929, - "step": 87480 - }, - { - "epoch": 0.7734401244717906, - "grad_norm": 2.2722551822662354, - "learning_rate": 3.7109331258803496e-05, - "loss": 0.6115, - "step": 87490 - }, - { - "epoch": 0.7735285277321028, - "grad_norm": 2.5263278484344482, - "learning_rate": 3.7107857871131624e-05, - "loss": 0.703, - "step": 87500 - }, - { - "epoch": 0.773616930992415, - "grad_norm": 2.4253995418548584, - "learning_rate": 3.710638448345975e-05, - "loss": 0.6902, - "step": 87510 - }, - { - "epoch": 0.7737053342527273, - "grad_norm": 4.172402858734131, - "learning_rate": 3.710491109578788e-05, - "loss": 0.7145, - "step": 87520 - }, - { - "epoch": 0.7737937375130395, - "grad_norm": 1.2479969263076782, - "learning_rate": 3.710343770811601e-05, - "loss": 0.5595, - "step": 87530 - }, - { - "epoch": 0.7738821407733517, - "grad_norm": 1.5717514753341675, - "learning_rate": 3.710196432044414e-05, - "loss": 0.6057, - "step": 87540 - }, - { - "epoch": 0.7739705440336639, - "grad_norm": 3.5647497177124023, - "learning_rate": 3.710049093277227e-05, - "loss": 0.6811, - "step": 87550 - }, - { - "epoch": 0.7740589472939762, - "grad_norm": 2.8100178241729736, - "learning_rate": 3.70990175451004e-05, - "loss": 0.6874, - "step": 87560 - }, - { - "epoch": 0.7741473505542884, - "grad_norm": 1.2704204320907593, - "learning_rate": 3.709754415742853e-05, - "loss": 0.5757, - "step": 87570 - }, - { - "epoch": 0.7742357538146006, - "grad_norm": 12.480536460876465, - "learning_rate": 3.709607076975666e-05, - "loss": 0.6589, - "step": 87580 - }, - { - "epoch": 0.774324157074913, - "grad_norm": 1.3453384637832642, - "learning_rate": 3.7094597382084786e-05, - "loss": 0.6775, - "step": 87590 - }, - { - "epoch": 0.7744125603352252, - "grad_norm": 5.988736152648926, - "learning_rate": 3.7093123994412914e-05, - "loss": 0.7738, - "step": 87600 - }, - { - "epoch": 0.7745009635955374, - "grad_norm": 3.0035417079925537, - "learning_rate": 3.709165060674105e-05, - "loss": 0.6694, - "step": 87610 - }, - { - "epoch": 0.7745893668558497, - "grad_norm": 4.275324821472168, - "learning_rate": 3.709017721906917e-05, - "loss": 0.6089, - "step": 87620 - }, - { - "epoch": 0.7746777701161619, - "grad_norm": 2.202371120452881, - "learning_rate": 3.7088703831397306e-05, - "loss": 0.7254, - "step": 87630 - }, - { - "epoch": 0.7747661733764741, - "grad_norm": 6.4135212898254395, - "learning_rate": 3.7087230443725435e-05, - "loss": 0.6861, - "step": 87640 - }, - { - "epoch": 0.7748545766367864, - "grad_norm": 1.285910964012146, - "learning_rate": 3.708575705605356e-05, - "loss": 0.6832, - "step": 87650 - }, - { - "epoch": 0.7749429798970986, - "grad_norm": 4.480310916900635, - "learning_rate": 3.708428366838169e-05, - "loss": 0.6222, - "step": 87660 - }, - { - "epoch": 0.7750313831574108, - "grad_norm": 5.771284580230713, - "learning_rate": 3.7082810280709826e-05, - "loss": 0.5067, - "step": 87670 - }, - { - "epoch": 0.7751197864177231, - "grad_norm": 9.911836624145508, - "learning_rate": 3.708133689303795e-05, - "loss": 0.6686, - "step": 87680 - }, - { - "epoch": 0.7752081896780353, - "grad_norm": 1.3472927808761597, - "learning_rate": 3.707986350536608e-05, - "loss": 0.6423, - "step": 87690 - }, - { - "epoch": 0.7752965929383475, - "grad_norm": 1.7603410482406616, - "learning_rate": 3.707839011769421e-05, - "loss": 0.7479, - "step": 87700 - }, - { - "epoch": 0.7753849961986599, - "grad_norm": 5.905195713043213, - "learning_rate": 3.707691673002234e-05, - "loss": 0.6705, - "step": 87710 - }, - { - "epoch": 0.7754733994589721, - "grad_norm": 4.155801296234131, - "learning_rate": 3.707544334235047e-05, - "loss": 0.7596, - "step": 87720 - }, - { - "epoch": 0.7755618027192843, - "grad_norm": 3.1507489681243896, - "learning_rate": 3.7073969954678597e-05, - "loss": 0.6666, - "step": 87730 - }, - { - "epoch": 0.7756502059795966, - "grad_norm": 1.6494165658950806, - "learning_rate": 3.7072496567006725e-05, - "loss": 0.6231, - "step": 87740 - }, - { - "epoch": 0.7757386092399088, - "grad_norm": 2.3226284980773926, - "learning_rate": 3.707102317933486e-05, - "loss": 0.765, - "step": 87750 - }, - { - "epoch": 0.775827012500221, - "grad_norm": 6.424279689788818, - "learning_rate": 3.706954979166298e-05, - "loss": 0.6071, - "step": 87760 - }, - { - "epoch": 0.7759154157605332, - "grad_norm": 3.302466869354248, - "learning_rate": 3.706807640399112e-05, - "loss": 0.736, - "step": 87770 - }, - { - "epoch": 0.7760038190208455, - "grad_norm": 1.4127602577209473, - "learning_rate": 3.7066603016319245e-05, - "loss": 0.6367, - "step": 87780 - }, - { - "epoch": 0.7760922222811577, - "grad_norm": 4.607992649078369, - "learning_rate": 3.7065129628647373e-05, - "loss": 0.5764, - "step": 87790 - }, - { - "epoch": 0.7761806255414699, - "grad_norm": 2.244208574295044, - "learning_rate": 3.70636562409755e-05, - "loss": 0.6067, - "step": 87800 - }, - { - "epoch": 0.7762690288017822, - "grad_norm": 2.9210565090179443, - "learning_rate": 3.706218285330364e-05, - "loss": 0.6236, - "step": 87810 - }, - { - "epoch": 0.7763574320620944, - "grad_norm": 0.6639277935028076, - "learning_rate": 3.706070946563176e-05, - "loss": 0.7391, - "step": 87820 - }, - { - "epoch": 0.7764458353224067, - "grad_norm": 4.9028520584106445, - "learning_rate": 3.7059236077959894e-05, - "loss": 0.6178, - "step": 87830 - }, - { - "epoch": 0.776534238582719, - "grad_norm": 6.615703105926514, - "learning_rate": 3.7057762690288015e-05, - "loss": 0.7192, - "step": 87840 - }, - { - "epoch": 0.7766226418430312, - "grad_norm": 3.8918676376342773, - "learning_rate": 3.705628930261615e-05, - "loss": 0.6397, - "step": 87850 - }, - { - "epoch": 0.7767110451033434, - "grad_norm": 2.080568313598633, - "learning_rate": 3.705481591494428e-05, - "loss": 0.7087, - "step": 87860 - }, - { - "epoch": 0.7767994483636557, - "grad_norm": 3.0624096393585205, - "learning_rate": 3.705334252727241e-05, - "loss": 0.6014, - "step": 87870 - }, - { - "epoch": 0.7768878516239679, - "grad_norm": 4.928555011749268, - "learning_rate": 3.7051869139600535e-05, - "loss": 0.5826, - "step": 87880 - }, - { - "epoch": 0.7769762548842801, - "grad_norm": 2.02699613571167, - "learning_rate": 3.705039575192867e-05, - "loss": 0.6875, - "step": 87890 - }, - { - "epoch": 0.7770646581445924, - "grad_norm": 4.903561115264893, - "learning_rate": 3.704892236425679e-05, - "loss": 0.619, - "step": 87900 - }, - { - "epoch": 0.7771530614049046, - "grad_norm": 1.723884105682373, - "learning_rate": 3.704744897658493e-05, - "loss": 0.5838, - "step": 87910 - }, - { - "epoch": 0.7772414646652168, - "grad_norm": 6.695972442626953, - "learning_rate": 3.7045975588913056e-05, - "loss": 0.7628, - "step": 87920 - }, - { - "epoch": 0.777329867925529, - "grad_norm": 2.024143934249878, - "learning_rate": 3.7044502201241184e-05, - "loss": 0.6228, - "step": 87930 - }, - { - "epoch": 0.7774182711858413, - "grad_norm": 2.132025718688965, - "learning_rate": 3.704302881356931e-05, - "loss": 0.731, - "step": 87940 - }, - { - "epoch": 0.7775066744461536, - "grad_norm": 7.058281898498535, - "learning_rate": 3.704155542589744e-05, - "loss": 0.6744, - "step": 87950 - }, - { - "epoch": 0.7775950777064659, - "grad_norm": 1.7809114456176758, - "learning_rate": 3.704008203822557e-05, - "loss": 0.6232, - "step": 87960 - }, - { - "epoch": 0.7776834809667781, - "grad_norm": 3.7689127922058105, - "learning_rate": 3.7038608650553704e-05, - "loss": 0.6386, - "step": 87970 - }, - { - "epoch": 0.7777718842270903, - "grad_norm": 5.322371006011963, - "learning_rate": 3.7037135262881826e-05, - "loss": 0.7953, - "step": 87980 - }, - { - "epoch": 0.7778602874874025, - "grad_norm": 1.9489524364471436, - "learning_rate": 3.703566187520996e-05, - "loss": 0.6845, - "step": 87990 - }, - { - "epoch": 0.7779486907477148, - "grad_norm": 4.435844421386719, - "learning_rate": 3.703418848753809e-05, - "loss": 0.7268, - "step": 88000 - }, - { - "epoch": 0.778037094008027, - "grad_norm": 5.996851921081543, - "learning_rate": 3.703271509986622e-05, - "loss": 0.5489, - "step": 88010 - }, - { - "epoch": 0.7781254972683392, - "grad_norm": 3.0234906673431396, - "learning_rate": 3.7031241712194346e-05, - "loss": 0.5764, - "step": 88020 - }, - { - "epoch": 0.7782139005286515, - "grad_norm": 3.921184778213501, - "learning_rate": 3.702976832452248e-05, - "loss": 0.5819, - "step": 88030 - }, - { - "epoch": 0.7783023037889637, - "grad_norm": 14.208062171936035, - "learning_rate": 3.70282949368506e-05, - "loss": 0.5787, - "step": 88040 - }, - { - "epoch": 0.7783907070492759, - "grad_norm": 2.463609218597412, - "learning_rate": 3.702682154917874e-05, - "loss": 0.7069, - "step": 88050 - }, - { - "epoch": 0.7784791103095882, - "grad_norm": 1.9219471216201782, - "learning_rate": 3.7025348161506866e-05, - "loss": 0.6457, - "step": 88060 - }, - { - "epoch": 0.7785675135699005, - "grad_norm": 3.2396671772003174, - "learning_rate": 3.7023874773834994e-05, - "loss": 0.6789, - "step": 88070 - }, - { - "epoch": 0.7786559168302127, - "grad_norm": 3.904404640197754, - "learning_rate": 3.702240138616312e-05, - "loss": 0.7621, - "step": 88080 - }, - { - "epoch": 0.778744320090525, - "grad_norm": 4.983608245849609, - "learning_rate": 3.702092799849125e-05, - "loss": 0.6982, - "step": 88090 - }, - { - "epoch": 0.7788327233508372, - "grad_norm": 3.2792069911956787, - "learning_rate": 3.701945461081938e-05, - "loss": 0.6748, - "step": 88100 - }, - { - "epoch": 0.7789211266111494, - "grad_norm": 2.070518970489502, - "learning_rate": 3.7017981223147515e-05, - "loss": 0.6089, - "step": 88110 - }, - { - "epoch": 0.7790095298714617, - "grad_norm": 2.015944719314575, - "learning_rate": 3.7016507835475636e-05, - "loss": 0.6293, - "step": 88120 - }, - { - "epoch": 0.7790979331317739, - "grad_norm": 3.026904821395874, - "learning_rate": 3.701503444780377e-05, - "loss": 0.7415, - "step": 88130 - }, - { - "epoch": 0.7791863363920861, - "grad_norm": 1.4083503484725952, - "learning_rate": 3.70135610601319e-05, - "loss": 0.6926, - "step": 88140 - }, - { - "epoch": 0.7792747396523984, - "grad_norm": 4.103747844696045, - "learning_rate": 3.701208767246003e-05, - "loss": 0.7771, - "step": 88150 - }, - { - "epoch": 0.7793631429127106, - "grad_norm": 4.715278625488281, - "learning_rate": 3.7010614284788156e-05, - "loss": 0.6279, - "step": 88160 - }, - { - "epoch": 0.7794515461730228, - "grad_norm": 8.26240062713623, - "learning_rate": 3.700914089711629e-05, - "loss": 0.5934, - "step": 88170 - }, - { - "epoch": 0.7795399494333352, - "grad_norm": 4.509095191955566, - "learning_rate": 3.700766750944441e-05, - "loss": 0.6107, - "step": 88180 - }, - { - "epoch": 0.7796283526936474, - "grad_norm": 2.47825288772583, - "learning_rate": 3.700619412177255e-05, - "loss": 0.6927, - "step": 88190 - }, - { - "epoch": 0.7797167559539596, - "grad_norm": 1.8736214637756348, - "learning_rate": 3.700472073410067e-05, - "loss": 0.583, - "step": 88200 - }, - { - "epoch": 0.7798051592142718, - "grad_norm": 2.698552131652832, - "learning_rate": 3.7003247346428805e-05, - "loss": 0.6544, - "step": 88210 - }, - { - "epoch": 0.7798935624745841, - "grad_norm": 2.763906955718994, - "learning_rate": 3.700177395875693e-05, - "loss": 0.6034, - "step": 88220 - }, - { - "epoch": 0.7799819657348963, - "grad_norm": 10.334351539611816, - "learning_rate": 3.700030057108506e-05, - "loss": 0.7124, - "step": 88230 - }, - { - "epoch": 0.7800703689952085, - "grad_norm": 2.847381114959717, - "learning_rate": 3.699882718341319e-05, - "loss": 0.7507, - "step": 88240 - }, - { - "epoch": 0.7801587722555208, - "grad_norm": 5.7208380699157715, - "learning_rate": 3.6997353795741325e-05, - "loss": 0.5929, - "step": 88250 - }, - { - "epoch": 0.780247175515833, - "grad_norm": 1.9132936000823975, - "learning_rate": 3.699588040806945e-05, - "loss": 0.6645, - "step": 88260 - }, - { - "epoch": 0.7803355787761452, - "grad_norm": 6.9606828689575195, - "learning_rate": 3.699440702039758e-05, - "loss": 0.6603, - "step": 88270 - }, - { - "epoch": 0.7804239820364575, - "grad_norm": 1.421900987625122, - "learning_rate": 3.699293363272571e-05, - "loss": 0.6757, - "step": 88280 - }, - { - "epoch": 0.7805123852967697, - "grad_norm": 8.137231826782227, - "learning_rate": 3.699146024505384e-05, - "loss": 0.6226, - "step": 88290 - }, - { - "epoch": 0.780600788557082, - "grad_norm": 4.930499076843262, - "learning_rate": 3.698998685738197e-05, - "loss": 0.8131, - "step": 88300 - }, - { - "epoch": 0.7806891918173943, - "grad_norm": 4.467109680175781, - "learning_rate": 3.6988513469710095e-05, - "loss": 0.6777, - "step": 88310 - }, - { - "epoch": 0.7807775950777065, - "grad_norm": 9.587776184082031, - "learning_rate": 3.6987040082038224e-05, - "loss": 0.6822, - "step": 88320 - }, - { - "epoch": 0.7808659983380187, - "grad_norm": 1.8515719175338745, - "learning_rate": 3.698556669436636e-05, - "loss": 0.7263, - "step": 88330 - }, - { - "epoch": 0.780954401598331, - "grad_norm": 6.098905086517334, - "learning_rate": 3.698409330669449e-05, - "loss": 0.5291, - "step": 88340 - }, - { - "epoch": 0.7810428048586432, - "grad_norm": 3.5075201988220215, - "learning_rate": 3.6982619919022615e-05, - "loss": 0.6244, - "step": 88350 - }, - { - "epoch": 0.7811312081189554, - "grad_norm": 2.036731719970703, - "learning_rate": 3.6981146531350744e-05, - "loss": 0.6066, - "step": 88360 - }, - { - "epoch": 0.7812196113792677, - "grad_norm": 2.9030609130859375, - "learning_rate": 3.697967314367887e-05, - "loss": 0.7719, - "step": 88370 - }, - { - "epoch": 0.7813080146395799, - "grad_norm": 1.686673641204834, - "learning_rate": 3.6978199756007e-05, - "loss": 0.6309, - "step": 88380 - }, - { - "epoch": 0.7813964178998921, - "grad_norm": 1.8342885971069336, - "learning_rate": 3.6976726368335136e-05, - "loss": 0.6344, - "step": 88390 - }, - { - "epoch": 0.7814848211602043, - "grad_norm": 2.6140074729919434, - "learning_rate": 3.6975252980663264e-05, - "loss": 0.8475, - "step": 88400 - }, - { - "epoch": 0.7815732244205166, - "grad_norm": 3.955953598022461, - "learning_rate": 3.697377959299139e-05, - "loss": 0.7072, - "step": 88410 - }, - { - "epoch": 0.7816616276808289, - "grad_norm": 2.344635248184204, - "learning_rate": 3.697230620531952e-05, - "loss": 0.6929, - "step": 88420 - }, - { - "epoch": 0.7817500309411411, - "grad_norm": 1.209633231163025, - "learning_rate": 3.697083281764765e-05, - "loss": 0.6304, - "step": 88430 - }, - { - "epoch": 0.7818384342014534, - "grad_norm": 5.430700302124023, - "learning_rate": 3.696935942997578e-05, - "loss": 0.6963, - "step": 88440 - }, - { - "epoch": 0.7819268374617656, - "grad_norm": 3.00968337059021, - "learning_rate": 3.6967886042303906e-05, - "loss": 0.6072, - "step": 88450 - }, - { - "epoch": 0.7820152407220778, - "grad_norm": 1.635097622871399, - "learning_rate": 3.696641265463204e-05, - "loss": 0.7084, - "step": 88460 - }, - { - "epoch": 0.7821036439823901, - "grad_norm": 1.3624483346939087, - "learning_rate": 3.696493926696017e-05, - "loss": 0.6762, - "step": 88470 - }, - { - "epoch": 0.7821920472427023, - "grad_norm": 31.46500587463379, - "learning_rate": 3.69634658792883e-05, - "loss": 0.6381, - "step": 88480 - }, - { - "epoch": 0.7822804505030145, - "grad_norm": 1.579077959060669, - "learning_rate": 3.6961992491616426e-05, - "loss": 0.6122, - "step": 88490 - }, - { - "epoch": 0.7823688537633268, - "grad_norm": 2.750282049179077, - "learning_rate": 3.6960519103944554e-05, - "loss": 0.5084, - "step": 88500 - }, - { - "epoch": 0.782457257023639, - "grad_norm": 10.721227645874023, - "learning_rate": 3.695904571627268e-05, - "loss": 0.7496, - "step": 88510 - }, - { - "epoch": 0.7825456602839512, - "grad_norm": 4.143172264099121, - "learning_rate": 3.695757232860082e-05, - "loss": 0.6698, - "step": 88520 - }, - { - "epoch": 0.7826340635442635, - "grad_norm": 2.3151283264160156, - "learning_rate": 3.6956098940928946e-05, - "loss": 0.7075, - "step": 88530 - }, - { - "epoch": 0.7827224668045758, - "grad_norm": 2.9645211696624756, - "learning_rate": 3.6954625553257075e-05, - "loss": 0.8017, - "step": 88540 - }, - { - "epoch": 0.782810870064888, - "grad_norm": 1.0456886291503906, - "learning_rate": 3.69531521655852e-05, - "loss": 0.6558, - "step": 88550 - }, - { - "epoch": 0.7828992733252003, - "grad_norm": 1.234302043914795, - "learning_rate": 3.695167877791333e-05, - "loss": 0.5816, - "step": 88560 - }, - { - "epoch": 0.7829876765855125, - "grad_norm": 1.1398594379425049, - "learning_rate": 3.695020539024146e-05, - "loss": 0.6601, - "step": 88570 - }, - { - "epoch": 0.7830760798458247, - "grad_norm": 4.491460800170898, - "learning_rate": 3.6948732002569595e-05, - "loss": 0.7649, - "step": 88580 - }, - { - "epoch": 0.783164483106137, - "grad_norm": 4.4944167137146, - "learning_rate": 3.6947258614897716e-05, - "loss": 0.5904, - "step": 88590 - }, - { - "epoch": 0.7832528863664492, - "grad_norm": 0.9227166771888733, - "learning_rate": 3.694578522722585e-05, - "loss": 0.6771, - "step": 88600 - }, - { - "epoch": 0.7833412896267614, - "grad_norm": 3.6446499824523926, - "learning_rate": 3.694431183955398e-05, - "loss": 0.6977, - "step": 88610 - }, - { - "epoch": 0.7834296928870736, - "grad_norm": 1.4419152736663818, - "learning_rate": 3.694283845188211e-05, - "loss": 0.6702, - "step": 88620 - }, - { - "epoch": 0.7835180961473859, - "grad_norm": 1.8561259508132935, - "learning_rate": 3.6941365064210237e-05, - "loss": 0.7333, - "step": 88630 - }, - { - "epoch": 0.7836064994076981, - "grad_norm": 3.7696709632873535, - "learning_rate": 3.693989167653837e-05, - "loss": 0.6795, - "step": 88640 - }, - { - "epoch": 0.7836949026680105, - "grad_norm": 3.3229572772979736, - "learning_rate": 3.693841828886649e-05, - "loss": 0.6587, - "step": 88650 - }, - { - "epoch": 0.7837833059283227, - "grad_norm": 4.2172346115112305, - "learning_rate": 3.693694490119463e-05, - "loss": 0.7052, - "step": 88660 - }, - { - "epoch": 0.7838717091886349, - "grad_norm": 1.510422706604004, - "learning_rate": 3.693547151352275e-05, - "loss": 0.5378, - "step": 88670 - }, - { - "epoch": 0.7839601124489471, - "grad_norm": 7.965168476104736, - "learning_rate": 3.6933998125850885e-05, - "loss": 0.7124, - "step": 88680 - }, - { - "epoch": 0.7840485157092594, - "grad_norm": 1.8420188426971436, - "learning_rate": 3.6932524738179013e-05, - "loss": 0.608, - "step": 88690 - }, - { - "epoch": 0.7841369189695716, - "grad_norm": 1.1057806015014648, - "learning_rate": 3.693105135050714e-05, - "loss": 0.6727, - "step": 88700 - }, - { - "epoch": 0.7842253222298838, - "grad_norm": 2.295884609222412, - "learning_rate": 3.692957796283527e-05, - "loss": 0.5431, - "step": 88710 - }, - { - "epoch": 0.7843137254901961, - "grad_norm": 2.478905200958252, - "learning_rate": 3.6928104575163405e-05, - "loss": 0.7279, - "step": 88720 - }, - { - "epoch": 0.7844021287505083, - "grad_norm": 2.1281604766845703, - "learning_rate": 3.692663118749153e-05, - "loss": 0.5643, - "step": 88730 - }, - { - "epoch": 0.7844905320108205, - "grad_norm": 2.78515625, - "learning_rate": 3.692515779981966e-05, - "loss": 0.5311, - "step": 88740 - }, - { - "epoch": 0.7845789352711328, - "grad_norm": 13.516206741333008, - "learning_rate": 3.692368441214779e-05, - "loss": 0.6049, - "step": 88750 - }, - { - "epoch": 0.784667338531445, - "grad_norm": 2.7559354305267334, - "learning_rate": 3.692221102447592e-05, - "loss": 0.6337, - "step": 88760 - }, - { - "epoch": 0.7847557417917573, - "grad_norm": 2.1681416034698486, - "learning_rate": 3.692073763680405e-05, - "loss": 0.8782, - "step": 88770 - }, - { - "epoch": 0.7848441450520696, - "grad_norm": 4.1809916496276855, - "learning_rate": 3.6919264249132175e-05, - "loss": 0.6429, - "step": 88780 - }, - { - "epoch": 0.7849325483123818, - "grad_norm": 2.3515563011169434, - "learning_rate": 3.6917790861460304e-05, - "loss": 0.6666, - "step": 88790 - }, - { - "epoch": 0.785020951572694, - "grad_norm": 1.1446459293365479, - "learning_rate": 3.691631747378844e-05, - "loss": 0.685, - "step": 88800 - }, - { - "epoch": 0.7851093548330063, - "grad_norm": 0.7936797738075256, - "learning_rate": 3.691484408611656e-05, - "loss": 0.6952, - "step": 88810 - }, - { - "epoch": 0.7851977580933185, - "grad_norm": 5.4908013343811035, - "learning_rate": 3.6913370698444696e-05, - "loss": 0.6882, - "step": 88820 - }, - { - "epoch": 0.7852861613536307, - "grad_norm": 7.077978134155273, - "learning_rate": 3.6911897310772824e-05, - "loss": 0.7397, - "step": 88830 - }, - { - "epoch": 0.785374564613943, - "grad_norm": 3.20505952835083, - "learning_rate": 3.691042392310095e-05, - "loss": 0.7053, - "step": 88840 - }, - { - "epoch": 0.7854629678742552, - "grad_norm": 7.291604042053223, - "learning_rate": 3.690895053542908e-05, - "loss": 0.8036, - "step": 88850 - }, - { - "epoch": 0.7855513711345674, - "grad_norm": 4.67714786529541, - "learning_rate": 3.6907477147757216e-05, - "loss": 0.7113, - "step": 88860 - }, - { - "epoch": 0.7856397743948796, - "grad_norm": 2.403384208679199, - "learning_rate": 3.690600376008534e-05, - "loss": 0.633, - "step": 88870 - }, - { - "epoch": 0.7857281776551919, - "grad_norm": 3.3825719356536865, - "learning_rate": 3.690453037241347e-05, - "loss": 0.7011, - "step": 88880 - }, - { - "epoch": 0.7858165809155042, - "grad_norm": 3.1293861865997314, - "learning_rate": 3.6903056984741594e-05, - "loss": 0.6551, - "step": 88890 - }, - { - "epoch": 0.7859049841758164, - "grad_norm": 7.326724529266357, - "learning_rate": 3.690158359706973e-05, - "loss": 0.5545, - "step": 88900 - }, - { - "epoch": 0.7859933874361287, - "grad_norm": 2.852421760559082, - "learning_rate": 3.690011020939786e-05, - "loss": 0.719, - "step": 88910 - }, - { - "epoch": 0.7860817906964409, - "grad_norm": 7.667893409729004, - "learning_rate": 3.6898636821725986e-05, - "loss": 0.6199, - "step": 88920 - }, - { - "epoch": 0.7861701939567531, - "grad_norm": 2.5921435356140137, - "learning_rate": 3.6897163434054114e-05, - "loss": 0.6916, - "step": 88930 - }, - { - "epoch": 0.7862585972170654, - "grad_norm": 1.0341322422027588, - "learning_rate": 3.689569004638225e-05, - "loss": 0.6539, - "step": 88940 - }, - { - "epoch": 0.7863470004773776, - "grad_norm": 7.7420573234558105, - "learning_rate": 3.689421665871037e-05, - "loss": 0.7001, - "step": 88950 - }, - { - "epoch": 0.7864354037376898, - "grad_norm": 5.026370525360107, - "learning_rate": 3.6892743271038506e-05, - "loss": 0.7464, - "step": 88960 - }, - { - "epoch": 0.7865238069980021, - "grad_norm": 2.243091583251953, - "learning_rate": 3.6891269883366634e-05, - "loss": 0.6455, - "step": 88970 - }, - { - "epoch": 0.7866122102583143, - "grad_norm": 4.696250915527344, - "learning_rate": 3.688979649569476e-05, - "loss": 0.577, - "step": 88980 - }, - { - "epoch": 0.7867006135186265, - "grad_norm": 11.603858947753906, - "learning_rate": 3.688832310802289e-05, - "loss": 0.5918, - "step": 88990 - }, - { - "epoch": 0.7867890167789388, - "grad_norm": 4.928067207336426, - "learning_rate": 3.6886849720351026e-05, - "loss": 0.7424, - "step": 89000 - }, - { - "epoch": 0.7868774200392511, - "grad_norm": 2.0284087657928467, - "learning_rate": 3.688537633267915e-05, - "loss": 0.5957, - "step": 89010 - }, - { - "epoch": 0.7869658232995633, - "grad_norm": 3.126385450363159, - "learning_rate": 3.688390294500728e-05, - "loss": 0.6453, - "step": 89020 - }, - { - "epoch": 0.7870542265598756, - "grad_norm": 2.6042163372039795, - "learning_rate": 3.6882429557335405e-05, - "loss": 0.5499, - "step": 89030 - }, - { - "epoch": 0.7871426298201878, - "grad_norm": 4.190433502197266, - "learning_rate": 3.688095616966354e-05, - "loss": 0.6625, - "step": 89040 - }, - { - "epoch": 0.7872310330805, - "grad_norm": 2.73695707321167, - "learning_rate": 3.687948278199167e-05, - "loss": 0.6045, - "step": 89050 - }, - { - "epoch": 0.7873194363408123, - "grad_norm": 3.515976905822754, - "learning_rate": 3.6878009394319796e-05, - "loss": 0.6021, - "step": 89060 - }, - { - "epoch": 0.7874078396011245, - "grad_norm": 2.2931578159332275, - "learning_rate": 3.6876536006647925e-05, - "loss": 0.5635, - "step": 89070 - }, - { - "epoch": 0.7874962428614367, - "grad_norm": 3.415226459503174, - "learning_rate": 3.687506261897606e-05, - "loss": 0.6612, - "step": 89080 - }, - { - "epoch": 0.7875846461217489, - "grad_norm": 6.145545482635498, - "learning_rate": 3.687358923130418e-05, - "loss": 0.6797, - "step": 89090 - }, - { - "epoch": 0.7876730493820612, - "grad_norm": 6.924167633056641, - "learning_rate": 3.6872115843632317e-05, - "loss": 0.8244, - "step": 89100 - }, - { - "epoch": 0.7877614526423734, - "grad_norm": 3.182163953781128, - "learning_rate": 3.6870642455960445e-05, - "loss": 0.6204, - "step": 89110 - }, - { - "epoch": 0.7878498559026856, - "grad_norm": 12.460630416870117, - "learning_rate": 3.686916906828857e-05, - "loss": 0.5892, - "step": 89120 - }, - { - "epoch": 0.787938259162998, - "grad_norm": 8.036012649536133, - "learning_rate": 3.68676956806167e-05, - "loss": 0.6687, - "step": 89130 - }, - { - "epoch": 0.7880266624233102, - "grad_norm": 3.1527159214019775, - "learning_rate": 3.686622229294483e-05, - "loss": 0.73, - "step": 89140 - }, - { - "epoch": 0.7881150656836224, - "grad_norm": 2.6475212574005127, - "learning_rate": 3.686474890527296e-05, - "loss": 0.5661, - "step": 89150 - }, - { - "epoch": 0.7882034689439347, - "grad_norm": 1.0636621713638306, - "learning_rate": 3.6863275517601093e-05, - "loss": 0.5466, - "step": 89160 - }, - { - "epoch": 0.7882918722042469, - "grad_norm": 5.516800880432129, - "learning_rate": 3.6861802129929215e-05, - "loss": 0.6586, - "step": 89170 - }, - { - "epoch": 0.7883802754645591, - "grad_norm": 2.99794340133667, - "learning_rate": 3.686032874225735e-05, - "loss": 0.735, - "step": 89180 - }, - { - "epoch": 0.7884686787248714, - "grad_norm": 5.132623195648193, - "learning_rate": 3.685885535458548e-05, - "loss": 0.626, - "step": 89190 - }, - { - "epoch": 0.7885570819851836, - "grad_norm": 7.003590106964111, - "learning_rate": 3.685738196691361e-05, - "loss": 0.7468, - "step": 89200 - }, - { - "epoch": 0.7886454852454958, - "grad_norm": 3.0408949851989746, - "learning_rate": 3.6855908579241735e-05, - "loss": 0.5835, - "step": 89210 - }, - { - "epoch": 0.7887338885058081, - "grad_norm": 4.65286922454834, - "learning_rate": 3.685443519156987e-05, - "loss": 0.7545, - "step": 89220 - }, - { - "epoch": 0.7888222917661203, - "grad_norm": 7.724262237548828, - "learning_rate": 3.685296180389799e-05, - "loss": 0.7402, - "step": 89230 - }, - { - "epoch": 0.7889106950264326, - "grad_norm": 5.497486114501953, - "learning_rate": 3.685148841622613e-05, - "loss": 0.66, - "step": 89240 - }, - { - "epoch": 0.7889990982867449, - "grad_norm": 1.7779713869094849, - "learning_rate": 3.6850015028554255e-05, - "loss": 0.616, - "step": 89250 - }, - { - "epoch": 0.7890875015470571, - "grad_norm": 10.849710464477539, - "learning_rate": 3.6848541640882384e-05, - "loss": 0.5861, - "step": 89260 - }, - { - "epoch": 0.7891759048073693, - "grad_norm": 16.75411605834961, - "learning_rate": 3.684706825321051e-05, - "loss": 0.7508, - "step": 89270 - }, - { - "epoch": 0.7892643080676816, - "grad_norm": 8.728506088256836, - "learning_rate": 3.684559486553864e-05, - "loss": 0.6679, - "step": 89280 - }, - { - "epoch": 0.7893527113279938, - "grad_norm": 3.0433952808380127, - "learning_rate": 3.684412147786677e-05, - "loss": 0.7144, - "step": 89290 - }, - { - "epoch": 0.789441114588306, - "grad_norm": 8.602217674255371, - "learning_rate": 3.6842648090194904e-05, - "loss": 0.6023, - "step": 89300 - }, - { - "epoch": 0.7895295178486182, - "grad_norm": 1.4041599035263062, - "learning_rate": 3.684117470252303e-05, - "loss": 0.7798, - "step": 89310 - }, - { - "epoch": 0.7896179211089305, - "grad_norm": 0.9814836978912354, - "learning_rate": 3.683970131485116e-05, - "loss": 0.7293, - "step": 89320 - }, - { - "epoch": 0.7897063243692427, - "grad_norm": 2.3014960289001465, - "learning_rate": 3.683822792717929e-05, - "loss": 0.7899, - "step": 89330 - }, - { - "epoch": 0.7897947276295549, - "grad_norm": 3.170083999633789, - "learning_rate": 3.683675453950742e-05, - "loss": 0.6036, - "step": 89340 - }, - { - "epoch": 0.7898831308898672, - "grad_norm": 3.452230453491211, - "learning_rate": 3.6835281151835546e-05, - "loss": 0.5871, - "step": 89350 - }, - { - "epoch": 0.7899715341501795, - "grad_norm": 7.357548713684082, - "learning_rate": 3.6833807764163674e-05, - "loss": 0.6071, - "step": 89360 - }, - { - "epoch": 0.7900599374104917, - "grad_norm": 1.5751733779907227, - "learning_rate": 3.683233437649181e-05, - "loss": 0.626, - "step": 89370 - }, - { - "epoch": 0.790148340670804, - "grad_norm": 2.1227712631225586, - "learning_rate": 3.683086098881994e-05, - "loss": 0.7323, - "step": 89380 - }, - { - "epoch": 0.7902367439311162, - "grad_norm": 10.857606887817383, - "learning_rate": 3.6829387601148066e-05, - "loss": 0.7407, - "step": 89390 - }, - { - "epoch": 0.7903251471914284, - "grad_norm": 2.5266811847686768, - "learning_rate": 3.6827914213476194e-05, - "loss": 0.5984, - "step": 89400 - }, - { - "epoch": 0.7904135504517407, - "grad_norm": 2.156224250793457, - "learning_rate": 3.682644082580432e-05, - "loss": 0.6688, - "step": 89410 - }, - { - "epoch": 0.7905019537120529, - "grad_norm": 1.744645357131958, - "learning_rate": 3.682496743813245e-05, - "loss": 0.7207, - "step": 89420 - }, - { - "epoch": 0.7905903569723651, - "grad_norm": 1.5828803777694702, - "learning_rate": 3.6823494050460586e-05, - "loss": 0.6678, - "step": 89430 - }, - { - "epoch": 0.7906787602326774, - "grad_norm": 5.530209064483643, - "learning_rate": 3.6822020662788714e-05, - "loss": 0.6763, - "step": 89440 - }, - { - "epoch": 0.7907671634929896, - "grad_norm": 0.8541598320007324, - "learning_rate": 3.682054727511684e-05, - "loss": 0.6077, - "step": 89450 - }, - { - "epoch": 0.7908555667533018, - "grad_norm": 2.934976816177368, - "learning_rate": 3.681907388744497e-05, - "loss": 0.6337, - "step": 89460 - }, - { - "epoch": 0.790943970013614, - "grad_norm": 1.8297994136810303, - "learning_rate": 3.68176004997731e-05, - "loss": 0.5861, - "step": 89470 - }, - { - "epoch": 0.7910323732739264, - "grad_norm": 1.726386547088623, - "learning_rate": 3.681612711210123e-05, - "loss": 0.62, - "step": 89480 - }, - { - "epoch": 0.7911207765342386, - "grad_norm": 1.0238670110702515, - "learning_rate": 3.681465372442936e-05, - "loss": 0.7597, - "step": 89490 - }, - { - "epoch": 0.7912091797945509, - "grad_norm": 4.956290245056152, - "learning_rate": 3.6813180336757485e-05, - "loss": 0.6482, - "step": 89500 - }, - { - "epoch": 0.7912975830548631, - "grad_norm": 6.393110275268555, - "learning_rate": 3.681170694908562e-05, - "loss": 0.6254, - "step": 89510 - }, - { - "epoch": 0.7913859863151753, - "grad_norm": 2.4836957454681396, - "learning_rate": 3.681023356141375e-05, - "loss": 0.6302, - "step": 89520 - }, - { - "epoch": 0.7914743895754875, - "grad_norm": 13.077306747436523, - "learning_rate": 3.6808760173741876e-05, - "loss": 0.6459, - "step": 89530 - }, - { - "epoch": 0.7915627928357998, - "grad_norm": 2.5070433616638184, - "learning_rate": 3.6807286786070005e-05, - "loss": 0.6984, - "step": 89540 - }, - { - "epoch": 0.791651196096112, - "grad_norm": 7.889594554901123, - "learning_rate": 3.680581339839814e-05, - "loss": 0.6318, - "step": 89550 - }, - { - "epoch": 0.7917395993564242, - "grad_norm": 3.2554988861083984, - "learning_rate": 3.680434001072626e-05, - "loss": 0.6755, - "step": 89560 - }, - { - "epoch": 0.7918280026167365, - "grad_norm": 1.702609658241272, - "learning_rate": 3.68028666230544e-05, - "loss": 0.6503, - "step": 89570 - }, - { - "epoch": 0.7919164058770487, - "grad_norm": 1.5688401460647583, - "learning_rate": 3.6801393235382525e-05, - "loss": 0.5643, - "step": 89580 - }, - { - "epoch": 0.7920048091373609, - "grad_norm": 2.11912202835083, - "learning_rate": 3.679991984771065e-05, - "loss": 0.6708, - "step": 89590 - }, - { - "epoch": 0.7920932123976733, - "grad_norm": 2.4789113998413086, - "learning_rate": 3.679844646003878e-05, - "loss": 0.7685, - "step": 89600 - }, - { - "epoch": 0.7921816156579855, - "grad_norm": 1.4530085325241089, - "learning_rate": 3.679697307236691e-05, - "loss": 0.591, - "step": 89610 - }, - { - "epoch": 0.7922700189182977, - "grad_norm": 2.6055314540863037, - "learning_rate": 3.679549968469504e-05, - "loss": 0.6398, - "step": 89620 - }, - { - "epoch": 0.79235842217861, - "grad_norm": 2.4989185333251953, - "learning_rate": 3.6794026297023174e-05, - "loss": 0.5998, - "step": 89630 - }, - { - "epoch": 0.7924468254389222, - "grad_norm": 1.8193473815917969, - "learning_rate": 3.6792552909351295e-05, - "loss": 0.6934, - "step": 89640 - }, - { - "epoch": 0.7925352286992344, - "grad_norm": 5.6462931632995605, - "learning_rate": 3.679107952167943e-05, - "loss": 0.6782, - "step": 89650 - }, - { - "epoch": 0.7926236319595467, - "grad_norm": 7.5111870765686035, - "learning_rate": 3.678960613400756e-05, - "loss": 0.7243, - "step": 89660 - }, - { - "epoch": 0.7927120352198589, - "grad_norm": 1.2621877193450928, - "learning_rate": 3.678813274633569e-05, - "loss": 0.684, - "step": 89670 - }, - { - "epoch": 0.7928004384801711, - "grad_norm": 2.9011409282684326, - "learning_rate": 3.6786659358663815e-05, - "loss": 0.7346, - "step": 89680 - }, - { - "epoch": 0.7928888417404834, - "grad_norm": 6.006600379943848, - "learning_rate": 3.678518597099195e-05, - "loss": 0.7034, - "step": 89690 - }, - { - "epoch": 0.7929772450007956, - "grad_norm": 1.6063816547393799, - "learning_rate": 3.678371258332007e-05, - "loss": 0.6455, - "step": 89700 - }, - { - "epoch": 0.7930656482611079, - "grad_norm": 3.9927990436553955, - "learning_rate": 3.678223919564821e-05, - "loss": 0.7919, - "step": 89710 - }, - { - "epoch": 0.7931540515214202, - "grad_norm": 5.710406303405762, - "learning_rate": 3.678076580797633e-05, - "loss": 0.7795, - "step": 89720 - }, - { - "epoch": 0.7932424547817324, - "grad_norm": 1.9698961973190308, - "learning_rate": 3.6779292420304464e-05, - "loss": 0.6593, - "step": 89730 - }, - { - "epoch": 0.7933308580420446, - "grad_norm": 1.3413444757461548, - "learning_rate": 3.677781903263259e-05, - "loss": 0.7236, - "step": 89740 - }, - { - "epoch": 0.7934192613023568, - "grad_norm": 3.25018048286438, - "learning_rate": 3.677634564496072e-05, - "loss": 0.6375, - "step": 89750 - }, - { - "epoch": 0.7935076645626691, - "grad_norm": 1.7060281038284302, - "learning_rate": 3.677487225728885e-05, - "loss": 0.7026, - "step": 89760 - }, - { - "epoch": 0.7935960678229813, - "grad_norm": 2.2737340927124023, - "learning_rate": 3.6773398869616984e-05, - "loss": 0.5609, - "step": 89770 - }, - { - "epoch": 0.7936844710832935, - "grad_norm": 2.828493595123291, - "learning_rate": 3.6771925481945106e-05, - "loss": 0.6469, - "step": 89780 - }, - { - "epoch": 0.7937728743436058, - "grad_norm": 1.5337222814559937, - "learning_rate": 3.677045209427324e-05, - "loss": 0.7227, - "step": 89790 - }, - { - "epoch": 0.793861277603918, - "grad_norm": 2.079472303390503, - "learning_rate": 3.676897870660137e-05, - "loss": 0.7591, - "step": 89800 - }, - { - "epoch": 0.7939496808642302, - "grad_norm": 1.264919638633728, - "learning_rate": 3.67675053189295e-05, - "loss": 0.6199, - "step": 89810 - }, - { - "epoch": 0.7940380841245425, - "grad_norm": 3.484257221221924, - "learning_rate": 3.6766031931257626e-05, - "loss": 0.7112, - "step": 89820 - }, - { - "epoch": 0.7941264873848548, - "grad_norm": 6.181394577026367, - "learning_rate": 3.6764558543585754e-05, - "loss": 0.7115, - "step": 89830 - }, - { - "epoch": 0.794214890645167, - "grad_norm": 2.282637119293213, - "learning_rate": 3.676308515591388e-05, - "loss": 0.6525, - "step": 89840 - }, - { - "epoch": 0.7943032939054793, - "grad_norm": 5.451003551483154, - "learning_rate": 3.676161176824202e-05, - "loss": 0.5581, - "step": 89850 - }, - { - "epoch": 0.7943916971657915, - "grad_norm": 5.316173076629639, - "learning_rate": 3.676013838057014e-05, - "loss": 0.7072, - "step": 89860 - }, - { - "epoch": 0.7944801004261037, - "grad_norm": 2.9746615886688232, - "learning_rate": 3.6758664992898274e-05, - "loss": 0.807, - "step": 89870 - }, - { - "epoch": 0.794568503686416, - "grad_norm": 2.413289785385132, - "learning_rate": 3.67571916052264e-05, - "loss": 0.6598, - "step": 89880 - }, - { - "epoch": 0.7946569069467282, - "grad_norm": 7.571915626525879, - "learning_rate": 3.675571821755453e-05, - "loss": 0.6777, - "step": 89890 - }, - { - "epoch": 0.7947453102070404, - "grad_norm": 1.52367103099823, - "learning_rate": 3.675424482988266e-05, - "loss": 0.6388, - "step": 89900 - }, - { - "epoch": 0.7948337134673527, - "grad_norm": 11.09347915649414, - "learning_rate": 3.6752771442210795e-05, - "loss": 0.5975, - "step": 89910 - }, - { - "epoch": 0.7949221167276649, - "grad_norm": 3.8028767108917236, - "learning_rate": 3.6751298054538916e-05, - "loss": 0.6116, - "step": 89920 - }, - { - "epoch": 0.7950105199879771, - "grad_norm": 3.759174108505249, - "learning_rate": 3.674982466686705e-05, - "loss": 0.5797, - "step": 89930 - }, - { - "epoch": 0.7950989232482893, - "grad_norm": 3.9057390689849854, - "learning_rate": 3.674835127919518e-05, - "loss": 0.8096, - "step": 89940 - }, - { - "epoch": 0.7951873265086017, - "grad_norm": 2.45003342628479, - "learning_rate": 3.674687789152331e-05, - "loss": 0.6402, - "step": 89950 - }, - { - "epoch": 0.7952757297689139, - "grad_norm": 1.45388925075531, - "learning_rate": 3.6745404503851436e-05, - "loss": 0.6921, - "step": 89960 - }, - { - "epoch": 0.7953641330292262, - "grad_norm": 2.3963100910186768, - "learning_rate": 3.6743931116179565e-05, - "loss": 0.5433, - "step": 89970 - }, - { - "epoch": 0.7954525362895384, - "grad_norm": 2.329308032989502, - "learning_rate": 3.674245772850769e-05, - "loss": 0.6011, - "step": 89980 - }, - { - "epoch": 0.7955409395498506, - "grad_norm": 3.735215663909912, - "learning_rate": 3.674098434083583e-05, - "loss": 0.727, - "step": 89990 - }, - { - "epoch": 0.7956293428101628, - "grad_norm": 5.208836078643799, - "learning_rate": 3.673951095316395e-05, - "loss": 0.6039, - "step": 90000 - }, - { - "epoch": 0.7957177460704751, - "grad_norm": 3.0822675228118896, - "learning_rate": 3.6738037565492085e-05, - "loss": 0.6221, - "step": 90010 - }, - { - "epoch": 0.7958061493307873, - "grad_norm": 1.4857887029647827, - "learning_rate": 3.673656417782021e-05, - "loss": 0.7058, - "step": 90020 - }, - { - "epoch": 0.7958945525910995, - "grad_norm": 12.714051246643066, - "learning_rate": 3.673509079014834e-05, - "loss": 0.603, - "step": 90030 - }, - { - "epoch": 0.7959829558514118, - "grad_norm": 6.570509910583496, - "learning_rate": 3.673361740247647e-05, - "loss": 0.6961, - "step": 90040 - }, - { - "epoch": 0.796071359111724, - "grad_norm": 1.5676804780960083, - "learning_rate": 3.6732144014804605e-05, - "loss": 0.73, - "step": 90050 - }, - { - "epoch": 0.7961597623720362, - "grad_norm": 1.061217188835144, - "learning_rate": 3.673067062713273e-05, - "loss": 0.7439, - "step": 90060 - }, - { - "epoch": 0.7962481656323486, - "grad_norm": 6.577081680297852, - "learning_rate": 3.672919723946086e-05, - "loss": 0.8206, - "step": 90070 - }, - { - "epoch": 0.7963365688926608, - "grad_norm": 1.5707778930664062, - "learning_rate": 3.672772385178898e-05, - "loss": 0.5718, - "step": 90080 - }, - { - "epoch": 0.796424972152973, - "grad_norm": 6.731858253479004, - "learning_rate": 3.672625046411712e-05, - "loss": 0.6494, - "step": 90090 - }, - { - "epoch": 0.7965133754132853, - "grad_norm": 3.61200213432312, - "learning_rate": 3.672477707644525e-05, - "loss": 0.7392, - "step": 90100 - }, - { - "epoch": 0.7966017786735975, - "grad_norm": 1.8012754917144775, - "learning_rate": 3.6723303688773375e-05, - "loss": 0.6165, - "step": 90110 - }, - { - "epoch": 0.7966901819339097, - "grad_norm": 6.126452445983887, - "learning_rate": 3.6721830301101504e-05, - "loss": 0.7815, - "step": 90120 - }, - { - "epoch": 0.796778585194222, - "grad_norm": 10.822237014770508, - "learning_rate": 3.672035691342964e-05, - "loss": 0.7079, - "step": 90130 - }, - { - "epoch": 0.7968669884545342, - "grad_norm": 9.062956809997559, - "learning_rate": 3.671888352575776e-05, - "loss": 0.6992, - "step": 90140 - }, - { - "epoch": 0.7969553917148464, - "grad_norm": 8.386719703674316, - "learning_rate": 3.6717410138085895e-05, - "loss": 0.5561, - "step": 90150 - }, - { - "epoch": 0.7970437949751586, - "grad_norm": 11.70284652709961, - "learning_rate": 3.6715936750414024e-05, - "loss": 0.6831, - "step": 90160 - }, - { - "epoch": 0.7971321982354709, - "grad_norm": 0.9447895884513855, - "learning_rate": 3.671446336274215e-05, - "loss": 0.6646, - "step": 90170 - }, - { - "epoch": 0.7972206014957831, - "grad_norm": 5.494142532348633, - "learning_rate": 3.671298997507028e-05, - "loss": 0.5407, - "step": 90180 - }, - { - "epoch": 0.7973090047560955, - "grad_norm": 8.045598030090332, - "learning_rate": 3.671151658739841e-05, - "loss": 0.8086, - "step": 90190 - }, - { - "epoch": 0.7973974080164077, - "grad_norm": 3.0696487426757812, - "learning_rate": 3.671004319972654e-05, - "loss": 0.6195, - "step": 90200 - }, - { - "epoch": 0.7974858112767199, - "grad_norm": 1.5057857036590576, - "learning_rate": 3.670856981205467e-05, - "loss": 0.6232, - "step": 90210 - }, - { - "epoch": 0.7975742145370321, - "grad_norm": 9.587413787841797, - "learning_rate": 3.67070964243828e-05, - "loss": 0.6251, - "step": 90220 - }, - { - "epoch": 0.7976626177973444, - "grad_norm": 3.105311870574951, - "learning_rate": 3.670562303671093e-05, - "loss": 0.7872, - "step": 90230 - }, - { - "epoch": 0.7977510210576566, - "grad_norm": 2.6679515838623047, - "learning_rate": 3.670414964903906e-05, - "loss": 0.7538, - "step": 90240 - }, - { - "epoch": 0.7978394243179688, - "grad_norm": 9.960758209228516, - "learning_rate": 3.6702676261367186e-05, - "loss": 0.7526, - "step": 90250 - }, - { - "epoch": 0.7979278275782811, - "grad_norm": 1.8704094886779785, - "learning_rate": 3.6701202873695314e-05, - "loss": 0.5858, - "step": 90260 - }, - { - "epoch": 0.7980162308385933, - "grad_norm": 5.982682228088379, - "learning_rate": 3.669972948602345e-05, - "loss": 0.5648, - "step": 90270 - }, - { - "epoch": 0.7981046340989055, - "grad_norm": 5.4588303565979, - "learning_rate": 3.669825609835158e-05, - "loss": 0.6909, - "step": 90280 - }, - { - "epoch": 0.7981930373592178, - "grad_norm": 2.4680209159851074, - "learning_rate": 3.6696782710679706e-05, - "loss": 0.5043, - "step": 90290 - }, - { - "epoch": 0.7982814406195301, - "grad_norm": 0.5643435716629028, - "learning_rate": 3.6695309323007834e-05, - "loss": 0.7279, - "step": 90300 - }, - { - "epoch": 0.7983698438798423, - "grad_norm": 1.5978022813796997, - "learning_rate": 3.669383593533596e-05, - "loss": 0.6806, - "step": 90310 - }, - { - "epoch": 0.7984582471401546, - "grad_norm": 2.233640432357788, - "learning_rate": 3.669236254766409e-05, - "loss": 0.5723, - "step": 90320 - }, - { - "epoch": 0.7985466504004668, - "grad_norm": 3.4113526344299316, - "learning_rate": 3.669088915999222e-05, - "loss": 0.5339, - "step": 90330 - }, - { - "epoch": 0.798635053660779, - "grad_norm": 2.4243462085723877, - "learning_rate": 3.6689415772320354e-05, - "loss": 0.5339, - "step": 90340 - }, - { - "epoch": 0.7987234569210913, - "grad_norm": 2.112652540206909, - "learning_rate": 3.668794238464848e-05, - "loss": 0.6625, - "step": 90350 - }, - { - "epoch": 0.7988118601814035, - "grad_norm": 1.8202285766601562, - "learning_rate": 3.668646899697661e-05, - "loss": 0.5857, - "step": 90360 - }, - { - "epoch": 0.7989002634417157, - "grad_norm": 3.372126817703247, - "learning_rate": 3.668499560930474e-05, - "loss": 0.6101, - "step": 90370 - }, - { - "epoch": 0.798988666702028, - "grad_norm": 1.6648164987564087, - "learning_rate": 3.668352222163287e-05, - "loss": 0.5972, - "step": 90380 - }, - { - "epoch": 0.7990770699623402, - "grad_norm": 3.1647562980651855, - "learning_rate": 3.6682048833960996e-05, - "loss": 0.6722, - "step": 90390 - }, - { - "epoch": 0.7991654732226524, - "grad_norm": 3.588517904281616, - "learning_rate": 3.668057544628913e-05, - "loss": 0.6789, - "step": 90400 - }, - { - "epoch": 0.7992538764829646, - "grad_norm": 3.194951295852661, - "learning_rate": 3.667910205861726e-05, - "loss": 0.6922, - "step": 90410 - }, - { - "epoch": 0.799342279743277, - "grad_norm": 1.9487104415893555, - "learning_rate": 3.667762867094539e-05, - "loss": 0.6304, - "step": 90420 - }, - { - "epoch": 0.7994306830035892, - "grad_norm": 18.30216407775879, - "learning_rate": 3.6676155283273516e-05, - "loss": 0.7401, - "step": 90430 - }, - { - "epoch": 0.7995190862639014, - "grad_norm": 21.167621612548828, - "learning_rate": 3.6674681895601645e-05, - "loss": 0.6146, - "step": 90440 - }, - { - "epoch": 0.7996074895242137, - "grad_norm": 6.47659969329834, - "learning_rate": 3.667320850792977e-05, - "loss": 0.6854, - "step": 90450 - }, - { - "epoch": 0.7996958927845259, - "grad_norm": 4.811198711395264, - "learning_rate": 3.667173512025791e-05, - "loss": 0.6042, - "step": 90460 - }, - { - "epoch": 0.7997842960448381, - "grad_norm": 6.630556106567383, - "learning_rate": 3.667026173258603e-05, - "loss": 0.7453, - "step": 90470 - }, - { - "epoch": 0.7998726993051504, - "grad_norm": 1.1571115255355835, - "learning_rate": 3.6668788344914165e-05, - "loss": 0.5701, - "step": 90480 - }, - { - "epoch": 0.7999611025654626, - "grad_norm": 3.7846672534942627, - "learning_rate": 3.666731495724229e-05, - "loss": 0.6055, - "step": 90490 - }, - { - "epoch": 0.8000495058257748, - "grad_norm": 1.8373130559921265, - "learning_rate": 3.666584156957042e-05, - "loss": 0.5539, - "step": 90500 - }, - { - "epoch": 0.8001379090860871, - "grad_norm": 2.4086570739746094, - "learning_rate": 3.666436818189855e-05, - "loss": 0.6487, - "step": 90510 - }, - { - "epoch": 0.8002263123463993, - "grad_norm": 8.181065559387207, - "learning_rate": 3.6662894794226685e-05, - "loss": 0.7604, - "step": 90520 - }, - { - "epoch": 0.8003147156067115, - "grad_norm": 2.988612413406372, - "learning_rate": 3.666142140655481e-05, - "loss": 0.5591, - "step": 90530 - }, - { - "epoch": 0.8004031188670239, - "grad_norm": 5.187861919403076, - "learning_rate": 3.665994801888294e-05, - "loss": 0.5942, - "step": 90540 - }, - { - "epoch": 0.8004915221273361, - "grad_norm": 6.124841690063477, - "learning_rate": 3.6658474631211063e-05, - "loss": 0.7474, - "step": 90550 - }, - { - "epoch": 0.8005799253876483, - "grad_norm": 5.242320537567139, - "learning_rate": 3.66570012435392e-05, - "loss": 0.6616, - "step": 90560 - }, - { - "epoch": 0.8006683286479606, - "grad_norm": 3.7027320861816406, - "learning_rate": 3.665552785586733e-05, - "loss": 0.5237, - "step": 90570 - }, - { - "epoch": 0.8007567319082728, - "grad_norm": 3.004584789276123, - "learning_rate": 3.6654054468195455e-05, - "loss": 0.7868, - "step": 90580 - }, - { - "epoch": 0.800845135168585, - "grad_norm": 9.394257545471191, - "learning_rate": 3.6652581080523584e-05, - "loss": 0.76, - "step": 90590 - }, - { - "epoch": 0.8009335384288973, - "grad_norm": 9.192709922790527, - "learning_rate": 3.665110769285172e-05, - "loss": 0.6656, - "step": 90600 - }, - { - "epoch": 0.8010219416892095, - "grad_norm": 5.200221061706543, - "learning_rate": 3.664963430517984e-05, - "loss": 0.6364, - "step": 90610 - }, - { - "epoch": 0.8011103449495217, - "grad_norm": 6.382055759429932, - "learning_rate": 3.6648160917507975e-05, - "loss": 0.7844, - "step": 90620 - }, - { - "epoch": 0.8011987482098339, - "grad_norm": 5.1465535163879395, - "learning_rate": 3.6646687529836104e-05, - "loss": 0.6745, - "step": 90630 - }, - { - "epoch": 0.8012871514701462, - "grad_norm": 3.387704610824585, - "learning_rate": 3.664521414216423e-05, - "loss": 0.6639, - "step": 90640 - }, - { - "epoch": 0.8013755547304584, - "grad_norm": 2.0622878074645996, - "learning_rate": 3.664374075449236e-05, - "loss": 0.7891, - "step": 90650 - }, - { - "epoch": 0.8014639579907707, - "grad_norm": 4.627909183502197, - "learning_rate": 3.664226736682049e-05, - "loss": 0.799, - "step": 90660 - }, - { - "epoch": 0.801552361251083, - "grad_norm": 2.908628463745117, - "learning_rate": 3.664079397914862e-05, - "loss": 0.7835, - "step": 90670 - }, - { - "epoch": 0.8016407645113952, - "grad_norm": 4.224820137023926, - "learning_rate": 3.663932059147675e-05, - "loss": 0.6483, - "step": 90680 - }, - { - "epoch": 0.8017291677717074, - "grad_norm": 3.7172868251800537, - "learning_rate": 3.6637847203804874e-05, - "loss": 0.6258, - "step": 90690 - }, - { - "epoch": 0.8018175710320197, - "grad_norm": 2.3695993423461914, - "learning_rate": 3.663637381613301e-05, - "loss": 0.6825, - "step": 90700 - }, - { - "epoch": 0.8019059742923319, - "grad_norm": 2.419285774230957, - "learning_rate": 3.663490042846114e-05, - "loss": 0.6306, - "step": 90710 - }, - { - "epoch": 0.8019943775526441, - "grad_norm": 2.4181325435638428, - "learning_rate": 3.6633427040789266e-05, - "loss": 0.6776, - "step": 90720 - }, - { - "epoch": 0.8020827808129564, - "grad_norm": 3.598076343536377, - "learning_rate": 3.6631953653117394e-05, - "loss": 0.6597, - "step": 90730 - }, - { - "epoch": 0.8021711840732686, - "grad_norm": 4.75786018371582, - "learning_rate": 3.663048026544553e-05, - "loss": 0.7046, - "step": 90740 - }, - { - "epoch": 0.8022595873335808, - "grad_norm": 2.5627851486206055, - "learning_rate": 3.662900687777365e-05, - "loss": 0.5065, - "step": 90750 - }, - { - "epoch": 0.8023479905938931, - "grad_norm": 1.4296832084655762, - "learning_rate": 3.6627533490101786e-05, - "loss": 0.6487, - "step": 90760 - }, - { - "epoch": 0.8024363938542053, - "grad_norm": 2.2443766593933105, - "learning_rate": 3.6626060102429914e-05, - "loss": 0.8873, - "step": 90770 - }, - { - "epoch": 0.8025247971145176, - "grad_norm": 2.9139606952667236, - "learning_rate": 3.662458671475804e-05, - "loss": 0.6156, - "step": 90780 - }, - { - "epoch": 0.8026132003748299, - "grad_norm": 4.232553482055664, - "learning_rate": 3.662311332708617e-05, - "loss": 0.79, - "step": 90790 - }, - { - "epoch": 0.8027016036351421, - "grad_norm": 6.342467308044434, - "learning_rate": 3.66216399394143e-05, - "loss": 0.7305, - "step": 90800 - }, - { - "epoch": 0.8027900068954543, - "grad_norm": 1.851207971572876, - "learning_rate": 3.662016655174243e-05, - "loss": 0.8652, - "step": 90810 - }, - { - "epoch": 0.8028784101557666, - "grad_norm": 3.6889631748199463, - "learning_rate": 3.661869316407056e-05, - "loss": 0.6235, - "step": 90820 - }, - { - "epoch": 0.8029668134160788, - "grad_norm": 7.3890533447265625, - "learning_rate": 3.6617219776398684e-05, - "loss": 0.5818, - "step": 90830 - }, - { - "epoch": 0.803055216676391, - "grad_norm": 7.880035400390625, - "learning_rate": 3.661574638872682e-05, - "loss": 0.5788, - "step": 90840 - }, - { - "epoch": 0.8031436199367032, - "grad_norm": 4.1777825355529785, - "learning_rate": 3.661427300105495e-05, - "loss": 0.5818, - "step": 90850 - }, - { - "epoch": 0.8032320231970155, - "grad_norm": 3.1537744998931885, - "learning_rate": 3.6612799613383076e-05, - "loss": 0.7929, - "step": 90860 - }, - { - "epoch": 0.8033204264573277, - "grad_norm": 5.2961955070495605, - "learning_rate": 3.6611326225711205e-05, - "loss": 0.7781, - "step": 90870 - }, - { - "epoch": 0.8034088297176399, - "grad_norm": 2.942534923553467, - "learning_rate": 3.660985283803934e-05, - "loss": 0.7238, - "step": 90880 - }, - { - "epoch": 0.8034972329779523, - "grad_norm": 3.3210320472717285, - "learning_rate": 3.660837945036746e-05, - "loss": 0.6948, - "step": 90890 - }, - { - "epoch": 0.8035856362382645, - "grad_norm": 7.409834861755371, - "learning_rate": 3.6606906062695596e-05, - "loss": 0.6393, - "step": 90900 - }, - { - "epoch": 0.8036740394985767, - "grad_norm": 2.8655855655670166, - "learning_rate": 3.660543267502372e-05, - "loss": 0.7015, - "step": 90910 - }, - { - "epoch": 0.803762442758889, - "grad_norm": 4.531734943389893, - "learning_rate": 3.660395928735185e-05, - "loss": 0.7349, - "step": 90920 - }, - { - "epoch": 0.8038508460192012, - "grad_norm": 5.419953346252441, - "learning_rate": 3.660248589967998e-05, - "loss": 0.7656, - "step": 90930 - }, - { - "epoch": 0.8039392492795134, - "grad_norm": 1.0050759315490723, - "learning_rate": 3.660101251200811e-05, - "loss": 0.643, - "step": 90940 - }, - { - "epoch": 0.8040276525398257, - "grad_norm": 1.530034065246582, - "learning_rate": 3.659953912433624e-05, - "loss": 0.6302, - "step": 90950 - }, - { - "epoch": 0.8041160558001379, - "grad_norm": 1.8367685079574585, - "learning_rate": 3.659806573666437e-05, - "loss": 0.6528, - "step": 90960 - }, - { - "epoch": 0.8042044590604501, - "grad_norm": 2.1076557636260986, - "learning_rate": 3.6596592348992495e-05, - "loss": 0.7145, - "step": 90970 - }, - { - "epoch": 0.8042928623207624, - "grad_norm": 2.1007304191589355, - "learning_rate": 3.659511896132063e-05, - "loss": 0.6833, - "step": 90980 - }, - { - "epoch": 0.8043812655810746, - "grad_norm": 2.545356035232544, - "learning_rate": 3.659364557364876e-05, - "loss": 0.5637, - "step": 90990 - }, - { - "epoch": 0.8044696688413868, - "grad_norm": 1.8784129619598389, - "learning_rate": 3.659217218597689e-05, - "loss": 0.5453, - "step": 91000 - }, - { - "epoch": 0.8045580721016992, - "grad_norm": 6.988739013671875, - "learning_rate": 3.6590698798305015e-05, - "loss": 0.7347, - "step": 91010 - }, - { - "epoch": 0.8046464753620114, - "grad_norm": 5.475579261779785, - "learning_rate": 3.6589225410633143e-05, - "loss": 0.6541, - "step": 91020 - }, - { - "epoch": 0.8047348786223236, - "grad_norm": 4.5187907218933105, - "learning_rate": 3.658775202296127e-05, - "loss": 0.6761, - "step": 91030 - }, - { - "epoch": 0.8048232818826359, - "grad_norm": 13.180841445922852, - "learning_rate": 3.658627863528941e-05, - "loss": 0.6719, - "step": 91040 - }, - { - "epoch": 0.8049116851429481, - "grad_norm": 32.88784408569336, - "learning_rate": 3.658480524761753e-05, - "loss": 0.557, - "step": 91050 - }, - { - "epoch": 0.8050000884032603, - "grad_norm": 7.399740695953369, - "learning_rate": 3.6583331859945664e-05, - "loss": 0.6873, - "step": 91060 - }, - { - "epoch": 0.8050884916635725, - "grad_norm": 3.388075590133667, - "learning_rate": 3.658185847227379e-05, - "loss": 0.6513, - "step": 91070 - }, - { - "epoch": 0.8051768949238848, - "grad_norm": 1.3924405574798584, - "learning_rate": 3.658038508460192e-05, - "loss": 0.6953, - "step": 91080 - }, - { - "epoch": 0.805265298184197, - "grad_norm": 1.3593623638153076, - "learning_rate": 3.657891169693005e-05, - "loss": 0.6209, - "step": 91090 - }, - { - "epoch": 0.8053537014445092, - "grad_norm": 5.281068801879883, - "learning_rate": 3.6577438309258184e-05, - "loss": 0.5972, - "step": 91100 - }, - { - "epoch": 0.8054421047048215, - "grad_norm": 4.000575065612793, - "learning_rate": 3.6575964921586305e-05, - "loss": 0.5616, - "step": 91110 - }, - { - "epoch": 0.8055305079651337, - "grad_norm": 2.1831018924713135, - "learning_rate": 3.657449153391444e-05, - "loss": 0.6358, - "step": 91120 - }, - { - "epoch": 0.805618911225446, - "grad_norm": 5.4715094566345215, - "learning_rate": 3.657301814624257e-05, - "loss": 0.6813, - "step": 91130 - }, - { - "epoch": 0.8057073144857583, - "grad_norm": 1.7945505380630493, - "learning_rate": 3.65715447585707e-05, - "loss": 0.6476, - "step": 91140 - }, - { - "epoch": 0.8057957177460705, - "grad_norm": 1.485714077949524, - "learning_rate": 3.6570071370898826e-05, - "loss": 0.6773, - "step": 91150 - }, - { - "epoch": 0.8058841210063827, - "grad_norm": 2.7322723865509033, - "learning_rate": 3.6568597983226954e-05, - "loss": 0.5902, - "step": 91160 - }, - { - "epoch": 0.805972524266695, - "grad_norm": 1.9922597408294678, - "learning_rate": 3.656712459555508e-05, - "loss": 0.4839, - "step": 91170 - }, - { - "epoch": 0.8060609275270072, - "grad_norm": 4.767215251922607, - "learning_rate": 3.656565120788322e-05, - "loss": 0.6663, - "step": 91180 - }, - { - "epoch": 0.8061493307873194, - "grad_norm": 2.5317060947418213, - "learning_rate": 3.6564177820211346e-05, - "loss": 0.6364, - "step": 91190 - }, - { - "epoch": 0.8062377340476317, - "grad_norm": 1.9275096654891968, - "learning_rate": 3.6562704432539474e-05, - "loss": 0.6303, - "step": 91200 - }, - { - "epoch": 0.8063261373079439, - "grad_norm": 10.892936706542969, - "learning_rate": 3.65612310448676e-05, - "loss": 0.747, - "step": 91210 - }, - { - "epoch": 0.8064145405682561, - "grad_norm": 7.613400936126709, - "learning_rate": 3.655975765719573e-05, - "loss": 0.6131, - "step": 91220 - }, - { - "epoch": 0.8065029438285684, - "grad_norm": 1.0172770023345947, - "learning_rate": 3.655828426952386e-05, - "loss": 0.648, - "step": 91230 - }, - { - "epoch": 0.8065913470888806, - "grad_norm": 2.1202609539031982, - "learning_rate": 3.6556810881851994e-05, - "loss": 0.5708, - "step": 91240 - }, - { - "epoch": 0.8066797503491929, - "grad_norm": 0.8410158157348633, - "learning_rate": 3.655533749418012e-05, - "loss": 0.6327, - "step": 91250 - }, - { - "epoch": 0.8067681536095052, - "grad_norm": 2.0556564331054688, - "learning_rate": 3.655386410650825e-05, - "loss": 0.6523, - "step": 91260 - }, - { - "epoch": 0.8068565568698174, - "grad_norm": 5.771847248077393, - "learning_rate": 3.655239071883638e-05, - "loss": 0.6363, - "step": 91270 - }, - { - "epoch": 0.8069449601301296, - "grad_norm": 1.1063709259033203, - "learning_rate": 3.655091733116451e-05, - "loss": 0.7078, - "step": 91280 - }, - { - "epoch": 0.8070333633904418, - "grad_norm": 7.785793781280518, - "learning_rate": 3.6549443943492636e-05, - "loss": 0.7513, - "step": 91290 - }, - { - "epoch": 0.8071217666507541, - "grad_norm": 3.255139112472534, - "learning_rate": 3.6547970555820765e-05, - "loss": 0.66, - "step": 91300 - }, - { - "epoch": 0.8072101699110663, - "grad_norm": 3.1840829849243164, - "learning_rate": 3.65464971681489e-05, - "loss": 0.7693, - "step": 91310 - }, - { - "epoch": 0.8072985731713785, - "grad_norm": 7.684268951416016, - "learning_rate": 3.654502378047703e-05, - "loss": 0.5535, - "step": 91320 - }, - { - "epoch": 0.8073869764316908, - "grad_norm": 6.404549598693848, - "learning_rate": 3.6543550392805156e-05, - "loss": 0.6261, - "step": 91330 - }, - { - "epoch": 0.807475379692003, - "grad_norm": 4.003452777862549, - "learning_rate": 3.6542077005133285e-05, - "loss": 0.6514, - "step": 91340 - }, - { - "epoch": 0.8075637829523152, - "grad_norm": 4.139762878417969, - "learning_rate": 3.654060361746141e-05, - "loss": 0.6895, - "step": 91350 - }, - { - "epoch": 0.8076521862126276, - "grad_norm": 2.405508279800415, - "learning_rate": 3.653913022978954e-05, - "loss": 0.6905, - "step": 91360 - }, - { - "epoch": 0.8077405894729398, - "grad_norm": 4.928290367126465, - "learning_rate": 3.6537656842117677e-05, - "loss": 0.4836, - "step": 91370 - }, - { - "epoch": 0.807828992733252, - "grad_norm": 6.773512363433838, - "learning_rate": 3.65361834544458e-05, - "loss": 0.6862, - "step": 91380 - }, - { - "epoch": 0.8079173959935643, - "grad_norm": 5.532743453979492, - "learning_rate": 3.653471006677393e-05, - "loss": 0.5549, - "step": 91390 - }, - { - "epoch": 0.8080057992538765, - "grad_norm": 4.4739861488342285, - "learning_rate": 3.653323667910206e-05, - "loss": 0.6652, - "step": 91400 - }, - { - "epoch": 0.8080942025141887, - "grad_norm": 2.3816475868225098, - "learning_rate": 3.653176329143019e-05, - "loss": 0.6128, - "step": 91410 - }, - { - "epoch": 0.808182605774501, - "grad_norm": 4.370458126068115, - "learning_rate": 3.653028990375832e-05, - "loss": 0.7605, - "step": 91420 - }, - { - "epoch": 0.8082710090348132, - "grad_norm": 3.0469794273376465, - "learning_rate": 3.6528816516086453e-05, - "loss": 0.6975, - "step": 91430 - }, - { - "epoch": 0.8083594122951254, - "grad_norm": 4.971312999725342, - "learning_rate": 3.6527343128414575e-05, - "loss": 0.7178, - "step": 91440 - }, - { - "epoch": 0.8084478155554377, - "grad_norm": 4.512945652008057, - "learning_rate": 3.652586974074271e-05, - "loss": 0.6244, - "step": 91450 - }, - { - "epoch": 0.8085362188157499, - "grad_norm": 8.823596954345703, - "learning_rate": 3.652439635307084e-05, - "loss": 0.599, - "step": 91460 - }, - { - "epoch": 0.8086246220760621, - "grad_norm": 2.64381742477417, - "learning_rate": 3.652292296539897e-05, - "loss": 0.6012, - "step": 91470 - }, - { - "epoch": 0.8087130253363745, - "grad_norm": 1.6936637163162231, - "learning_rate": 3.6521449577727095e-05, - "loss": 0.7722, - "step": 91480 - }, - { - "epoch": 0.8088014285966867, - "grad_norm": 1.9723860025405884, - "learning_rate": 3.6519976190055224e-05, - "loss": 0.727, - "step": 91490 - }, - { - "epoch": 0.8088898318569989, - "grad_norm": 6.06514835357666, - "learning_rate": 3.651850280238335e-05, - "loss": 0.7831, - "step": 91500 - }, - { - "epoch": 0.8089782351173112, - "grad_norm": 9.091752052307129, - "learning_rate": 3.651702941471149e-05, - "loss": 0.624, - "step": 91510 - }, - { - "epoch": 0.8090666383776234, - "grad_norm": 4.952655792236328, - "learning_rate": 3.651555602703961e-05, - "loss": 0.617, - "step": 91520 - }, - { - "epoch": 0.8091550416379356, - "grad_norm": 3.2162885665893555, - "learning_rate": 3.6514082639367744e-05, - "loss": 0.6377, - "step": 91530 - }, - { - "epoch": 0.8092434448982478, - "grad_norm": 2.793762445449829, - "learning_rate": 3.651260925169587e-05, - "loss": 0.6733, - "step": 91540 - }, - { - "epoch": 0.8093318481585601, - "grad_norm": 1.3875963687896729, - "learning_rate": 3.6511135864024e-05, - "loss": 0.6158, - "step": 91550 - }, - { - "epoch": 0.8094202514188723, - "grad_norm": 1.0732028484344482, - "learning_rate": 3.650966247635213e-05, - "loss": 0.6045, - "step": 91560 - }, - { - "epoch": 0.8095086546791845, - "grad_norm": 2.06482195854187, - "learning_rate": 3.6508189088680264e-05, - "loss": 0.6835, - "step": 91570 - }, - { - "epoch": 0.8095970579394968, - "grad_norm": 8.042503356933594, - "learning_rate": 3.6506715701008386e-05, - "loss": 0.6592, - "step": 91580 - }, - { - "epoch": 0.809685461199809, - "grad_norm": 6.1628241539001465, - "learning_rate": 3.650524231333652e-05, - "loss": 0.5696, - "step": 91590 - }, - { - "epoch": 0.8097738644601213, - "grad_norm": 1.5379279851913452, - "learning_rate": 3.650376892566464e-05, - "loss": 0.6397, - "step": 91600 - }, - { - "epoch": 0.8098622677204336, - "grad_norm": 7.55742073059082, - "learning_rate": 3.650229553799278e-05, - "loss": 0.698, - "step": 91610 - }, - { - "epoch": 0.8099506709807458, - "grad_norm": 5.294787883758545, - "learning_rate": 3.6500822150320906e-05, - "loss": 0.5999, - "step": 91620 - }, - { - "epoch": 0.810039074241058, - "grad_norm": 2.8587660789489746, - "learning_rate": 3.6499348762649034e-05, - "loss": 0.6621, - "step": 91630 - }, - { - "epoch": 0.8101274775013703, - "grad_norm": 5.2149858474731445, - "learning_rate": 3.649787537497716e-05, - "loss": 0.7445, - "step": 91640 - }, - { - "epoch": 0.8102158807616825, - "grad_norm": 5.11593770980835, - "learning_rate": 3.64964019873053e-05, - "loss": 0.7587, - "step": 91650 - }, - { - "epoch": 0.8103042840219947, - "grad_norm": 9.673462867736816, - "learning_rate": 3.649492859963342e-05, - "loss": 0.7378, - "step": 91660 - }, - { - "epoch": 0.810392687282307, - "grad_norm": 3.2622766494750977, - "learning_rate": 3.6493455211961554e-05, - "loss": 0.6514, - "step": 91670 - }, - { - "epoch": 0.8104810905426192, - "grad_norm": 7.931560039520264, - "learning_rate": 3.649198182428968e-05, - "loss": 0.701, - "step": 91680 - }, - { - "epoch": 0.8105694938029314, - "grad_norm": 4.800394058227539, - "learning_rate": 3.649050843661781e-05, - "loss": 0.6736, - "step": 91690 - }, - { - "epoch": 0.8106578970632436, - "grad_norm": 1.9453575611114502, - "learning_rate": 3.648903504894594e-05, - "loss": 0.7023, - "step": 91700 - }, - { - "epoch": 0.8107463003235559, - "grad_norm": 3.5230698585510254, - "learning_rate": 3.6487561661274074e-05, - "loss": 0.6172, - "step": 91710 - }, - { - "epoch": 0.8108347035838682, - "grad_norm": 1.1256121397018433, - "learning_rate": 3.6486088273602196e-05, - "loss": 0.5613, - "step": 91720 - }, - { - "epoch": 0.8109231068441805, - "grad_norm": 1.6945984363555908, - "learning_rate": 3.648461488593033e-05, - "loss": 0.5496, - "step": 91730 - }, - { - "epoch": 0.8110115101044927, - "grad_norm": 4.023597240447998, - "learning_rate": 3.648314149825845e-05, - "loss": 0.7627, - "step": 91740 - }, - { - "epoch": 0.8110999133648049, - "grad_norm": 2.5677945613861084, - "learning_rate": 3.648166811058659e-05, - "loss": 0.7206, - "step": 91750 - }, - { - "epoch": 0.8111883166251171, - "grad_norm": 2.9123995304107666, - "learning_rate": 3.6480194722914716e-05, - "loss": 0.6682, - "step": 91760 - }, - { - "epoch": 0.8112767198854294, - "grad_norm": 2.7454440593719482, - "learning_rate": 3.6478721335242845e-05, - "loss": 0.5953, - "step": 91770 - }, - { - "epoch": 0.8113651231457416, - "grad_norm": 3.35835337638855, - "learning_rate": 3.647724794757097e-05, - "loss": 0.5792, - "step": 91780 - }, - { - "epoch": 0.8114535264060538, - "grad_norm": 8.722201347351074, - "learning_rate": 3.647577455989911e-05, - "loss": 0.6281, - "step": 91790 - }, - { - "epoch": 0.8115419296663661, - "grad_norm": 8.841720581054688, - "learning_rate": 3.647430117222723e-05, - "loss": 0.6054, - "step": 91800 - }, - { - "epoch": 0.8116303329266783, - "grad_norm": 2.318906545639038, - "learning_rate": 3.6472827784555365e-05, - "loss": 0.6452, - "step": 91810 - }, - { - "epoch": 0.8117187361869905, - "grad_norm": 6.147796154022217, - "learning_rate": 3.647135439688349e-05, - "loss": 0.6364, - "step": 91820 - }, - { - "epoch": 0.8118071394473028, - "grad_norm": 4.061427116394043, - "learning_rate": 3.646988100921162e-05, - "loss": 0.716, - "step": 91830 - }, - { - "epoch": 0.8118955427076151, - "grad_norm": 9.489065170288086, - "learning_rate": 3.646840762153975e-05, - "loss": 0.4981, - "step": 91840 - }, - { - "epoch": 0.8119839459679273, - "grad_norm": 2.0221989154815674, - "learning_rate": 3.646693423386788e-05, - "loss": 0.6535, - "step": 91850 - }, - { - "epoch": 0.8120723492282396, - "grad_norm": 6.83666467666626, - "learning_rate": 3.6465460846196007e-05, - "loss": 0.6213, - "step": 91860 - }, - { - "epoch": 0.8121607524885518, - "grad_norm": 2.6959662437438965, - "learning_rate": 3.646398745852414e-05, - "loss": 0.7453, - "step": 91870 - }, - { - "epoch": 0.812249155748864, - "grad_norm": 7.575078964233398, - "learning_rate": 3.646251407085226e-05, - "loss": 0.7979, - "step": 91880 - }, - { - "epoch": 0.8123375590091763, - "grad_norm": 2.185880661010742, - "learning_rate": 3.64610406831804e-05, - "loss": 0.6016, - "step": 91890 - }, - { - "epoch": 0.8124259622694885, - "grad_norm": 2.460078477859497, - "learning_rate": 3.645956729550853e-05, - "loss": 0.6122, - "step": 91900 - }, - { - "epoch": 0.8125143655298007, - "grad_norm": 4.931056976318359, - "learning_rate": 3.6458093907836655e-05, - "loss": 0.7672, - "step": 91910 - }, - { - "epoch": 0.812602768790113, - "grad_norm": 3.246084451675415, - "learning_rate": 3.6456620520164783e-05, - "loss": 0.6281, - "step": 91920 - }, - { - "epoch": 0.8126911720504252, - "grad_norm": 4.333129405975342, - "learning_rate": 3.645514713249292e-05, - "loss": 0.6461, - "step": 91930 - }, - { - "epoch": 0.8127795753107374, - "grad_norm": 1.3898875713348389, - "learning_rate": 3.645367374482104e-05, - "loss": 0.5682, - "step": 91940 - }, - { - "epoch": 0.8128679785710498, - "grad_norm": 1.76920485496521, - "learning_rate": 3.6452200357149175e-05, - "loss": 0.764, - "step": 91950 - }, - { - "epoch": 0.812956381831362, - "grad_norm": 3.05548357963562, - "learning_rate": 3.64507269694773e-05, - "loss": 0.6244, - "step": 91960 - }, - { - "epoch": 0.8130447850916742, - "grad_norm": 2.593017816543579, - "learning_rate": 3.644925358180543e-05, - "loss": 0.6237, - "step": 91970 - }, - { - "epoch": 0.8131331883519864, - "grad_norm": 5.923634052276611, - "learning_rate": 3.644778019413356e-05, - "loss": 0.5903, - "step": 91980 - }, - { - "epoch": 0.8132215916122987, - "grad_norm": 2.416029453277588, - "learning_rate": 3.644630680646169e-05, - "loss": 0.6887, - "step": 91990 - }, - { - "epoch": 0.8133099948726109, - "grad_norm": 5.269219398498535, - "learning_rate": 3.644483341878982e-05, - "loss": 0.5629, - "step": 92000 - }, - { - "epoch": 0.8133983981329231, - "grad_norm": 1.3841913938522339, - "learning_rate": 3.644336003111795e-05, - "loss": 0.7364, - "step": 92010 - }, - { - "epoch": 0.8134868013932354, - "grad_norm": 7.034547328948975, - "learning_rate": 3.6441886643446074e-05, - "loss": 0.661, - "step": 92020 - }, - { - "epoch": 0.8135752046535476, - "grad_norm": 2.8226442337036133, - "learning_rate": 3.644041325577421e-05, - "loss": 0.7605, - "step": 92030 - }, - { - "epoch": 0.8136636079138598, - "grad_norm": 3.047433376312256, - "learning_rate": 3.643893986810234e-05, - "loss": 0.6732, - "step": 92040 - }, - { - "epoch": 0.8137520111741721, - "grad_norm": 5.352085113525391, - "learning_rate": 3.6437466480430466e-05, - "loss": 0.666, - "step": 92050 - }, - { - "epoch": 0.8138404144344843, - "grad_norm": 7.8496246337890625, - "learning_rate": 3.6435993092758594e-05, - "loss": 0.6799, - "step": 92060 - }, - { - "epoch": 0.8139288176947966, - "grad_norm": 3.2211687564849854, - "learning_rate": 3.643451970508672e-05, - "loss": 0.6864, - "step": 92070 - }, - { - "epoch": 0.8140172209551089, - "grad_norm": 1.2963106632232666, - "learning_rate": 3.643304631741485e-05, - "loss": 0.5978, - "step": 92080 - }, - { - "epoch": 0.8141056242154211, - "grad_norm": 7.160528182983398, - "learning_rate": 3.6431572929742986e-05, - "loss": 0.6484, - "step": 92090 - }, - { - "epoch": 0.8141940274757333, - "grad_norm": 3.806544542312622, - "learning_rate": 3.6430099542071114e-05, - "loss": 0.6917, - "step": 92100 - }, - { - "epoch": 0.8142824307360456, - "grad_norm": 2.3122305870056152, - "learning_rate": 3.642862615439924e-05, - "loss": 0.8155, - "step": 92110 - }, - { - "epoch": 0.8143708339963578, - "grad_norm": 2.539961338043213, - "learning_rate": 3.642715276672737e-05, - "loss": 0.5733, - "step": 92120 - }, - { - "epoch": 0.81445923725667, - "grad_norm": 2.348661184310913, - "learning_rate": 3.64256793790555e-05, - "loss": 0.6232, - "step": 92130 - }, - { - "epoch": 0.8145476405169823, - "grad_norm": 8.354660987854004, - "learning_rate": 3.642420599138363e-05, - "loss": 0.6642, - "step": 92140 - }, - { - "epoch": 0.8146360437772945, - "grad_norm": 1.988718867301941, - "learning_rate": 3.642273260371176e-05, - "loss": 0.7333, - "step": 92150 - }, - { - "epoch": 0.8147244470376067, - "grad_norm": 1.8847707509994507, - "learning_rate": 3.642125921603989e-05, - "loss": 0.6676, - "step": 92160 - }, - { - "epoch": 0.814812850297919, - "grad_norm": 1.855980396270752, - "learning_rate": 3.641978582836802e-05, - "loss": 0.7347, - "step": 92170 - }, - { - "epoch": 0.8149012535582312, - "grad_norm": 3.3950014114379883, - "learning_rate": 3.641831244069615e-05, - "loss": 0.5434, - "step": 92180 - }, - { - "epoch": 0.8149896568185435, - "grad_norm": 1.589786410331726, - "learning_rate": 3.6416839053024276e-05, - "loss": 0.6124, - "step": 92190 - }, - { - "epoch": 0.8150780600788557, - "grad_norm": 2.3193206787109375, - "learning_rate": 3.6415365665352404e-05, - "loss": 0.7166, - "step": 92200 - }, - { - "epoch": 0.815166463339168, - "grad_norm": 2.0396432876586914, - "learning_rate": 3.641389227768053e-05, - "loss": 0.5533, - "step": 92210 - }, - { - "epoch": 0.8152548665994802, - "grad_norm": 2.424165964126587, - "learning_rate": 3.641241889000867e-05, - "loss": 0.6541, - "step": 92220 - }, - { - "epoch": 0.8153432698597924, - "grad_norm": 4.729544162750244, - "learning_rate": 3.6410945502336796e-05, - "loss": 0.6918, - "step": 92230 - }, - { - "epoch": 0.8154316731201047, - "grad_norm": 4.937289237976074, - "learning_rate": 3.6409472114664925e-05, - "loss": 0.6124, - "step": 92240 - }, - { - "epoch": 0.8155200763804169, - "grad_norm": 1.8067333698272705, - "learning_rate": 3.640799872699305e-05, - "loss": 0.595, - "step": 92250 - }, - { - "epoch": 0.8156084796407291, - "grad_norm": 8.4617919921875, - "learning_rate": 3.640652533932118e-05, - "loss": 0.6465, - "step": 92260 - }, - { - "epoch": 0.8156968829010414, - "grad_norm": 2.161268949508667, - "learning_rate": 3.640505195164931e-05, - "loss": 0.5909, - "step": 92270 - }, - { - "epoch": 0.8157852861613536, - "grad_norm": 12.53947925567627, - "learning_rate": 3.6403578563977445e-05, - "loss": 0.7782, - "step": 92280 - }, - { - "epoch": 0.8158736894216658, - "grad_norm": 4.181900501251221, - "learning_rate": 3.640210517630557e-05, - "loss": 0.6112, - "step": 92290 - }, - { - "epoch": 0.8159620926819781, - "grad_norm": 1.1209492683410645, - "learning_rate": 3.64006317886337e-05, - "loss": 0.673, - "step": 92300 - }, - { - "epoch": 0.8160504959422904, - "grad_norm": 3.288938045501709, - "learning_rate": 3.639915840096183e-05, - "loss": 0.6374, - "step": 92310 - }, - { - "epoch": 0.8161388992026026, - "grad_norm": 1.3260165452957153, - "learning_rate": 3.639768501328996e-05, - "loss": 0.5456, - "step": 92320 - }, - { - "epoch": 0.8162273024629149, - "grad_norm": 3.2640914916992188, - "learning_rate": 3.639621162561809e-05, - "loss": 0.6227, - "step": 92330 - }, - { - "epoch": 0.8163157057232271, - "grad_norm": 2.262810468673706, - "learning_rate": 3.639473823794622e-05, - "loss": 0.4703, - "step": 92340 - }, - { - "epoch": 0.8164041089835393, - "grad_norm": 2.8205833435058594, - "learning_rate": 3.639326485027434e-05, - "loss": 0.7569, - "step": 92350 - }, - { - "epoch": 0.8164925122438516, - "grad_norm": 11.378689765930176, - "learning_rate": 3.639179146260248e-05, - "loss": 0.693, - "step": 92360 - }, - { - "epoch": 0.8165809155041638, - "grad_norm": 2.483201503753662, - "learning_rate": 3.639031807493061e-05, - "loss": 0.7184, - "step": 92370 - }, - { - "epoch": 0.816669318764476, - "grad_norm": 5.942753314971924, - "learning_rate": 3.6388844687258735e-05, - "loss": 0.5987, - "step": 92380 - }, - { - "epoch": 0.8167577220247882, - "grad_norm": 1.5592632293701172, - "learning_rate": 3.6387371299586864e-05, - "loss": 0.7121, - "step": 92390 - }, - { - "epoch": 0.8168461252851005, - "grad_norm": 2.551483392715454, - "learning_rate": 3.6385897911915e-05, - "loss": 0.6107, - "step": 92400 - }, - { - "epoch": 0.8169345285454127, - "grad_norm": 6.301365852355957, - "learning_rate": 3.638442452424312e-05, - "loss": 0.5015, - "step": 92410 - }, - { - "epoch": 0.817022931805725, - "grad_norm": 2.5561041831970215, - "learning_rate": 3.6382951136571255e-05, - "loss": 0.7222, - "step": 92420 - }, - { - "epoch": 0.8171113350660373, - "grad_norm": 2.8549132347106934, - "learning_rate": 3.638147774889938e-05, - "loss": 0.712, - "step": 92430 - }, - { - "epoch": 0.8171997383263495, - "grad_norm": 3.7408576011657715, - "learning_rate": 3.638000436122751e-05, - "loss": 0.7478, - "step": 92440 - }, - { - "epoch": 0.8172881415866617, - "grad_norm": 6.571569442749023, - "learning_rate": 3.637853097355564e-05, - "loss": 0.6062, - "step": 92450 - }, - { - "epoch": 0.817376544846974, - "grad_norm": 2.056588649749756, - "learning_rate": 3.637705758588377e-05, - "loss": 0.679, - "step": 92460 - }, - { - "epoch": 0.8174649481072862, - "grad_norm": 2.586590528488159, - "learning_rate": 3.63755841982119e-05, - "loss": 0.5518, - "step": 92470 - }, - { - "epoch": 0.8175533513675984, - "grad_norm": 2.3542826175689697, - "learning_rate": 3.637411081054003e-05, - "loss": 0.6996, - "step": 92480 - }, - { - "epoch": 0.8176417546279107, - "grad_norm": 2.6961874961853027, - "learning_rate": 3.6372637422868154e-05, - "loss": 0.5685, - "step": 92490 - }, - { - "epoch": 0.8177301578882229, - "grad_norm": 2.5827276706695557, - "learning_rate": 3.637116403519629e-05, - "loss": 0.5815, - "step": 92500 - }, - { - "epoch": 0.8178185611485351, - "grad_norm": 4.169956207275391, - "learning_rate": 3.636969064752442e-05, - "loss": 0.6305, - "step": 92510 - }, - { - "epoch": 0.8179069644088474, - "grad_norm": 2.4388086795806885, - "learning_rate": 3.6368217259852546e-05, - "loss": 0.6421, - "step": 92520 - }, - { - "epoch": 0.8179953676691596, - "grad_norm": 12.235270500183105, - "learning_rate": 3.6366743872180674e-05, - "loss": 0.7019, - "step": 92530 - }, - { - "epoch": 0.8180837709294719, - "grad_norm": 1.524057388305664, - "learning_rate": 3.63652704845088e-05, - "loss": 0.5144, - "step": 92540 - }, - { - "epoch": 0.8181721741897842, - "grad_norm": 2.38299822807312, - "learning_rate": 3.636379709683693e-05, - "loss": 0.7373, - "step": 92550 - }, - { - "epoch": 0.8182605774500964, - "grad_norm": 2.7471745014190674, - "learning_rate": 3.6362323709165066e-05, - "loss": 0.6435, - "step": 92560 - }, - { - "epoch": 0.8183489807104086, - "grad_norm": 2.5418381690979004, - "learning_rate": 3.636085032149319e-05, - "loss": 0.7688, - "step": 92570 - }, - { - "epoch": 0.8184373839707209, - "grad_norm": 1.9382070302963257, - "learning_rate": 3.635937693382132e-05, - "loss": 0.7713, - "step": 92580 - }, - { - "epoch": 0.8185257872310331, - "grad_norm": 4.588695526123047, - "learning_rate": 3.635790354614945e-05, - "loss": 0.6895, - "step": 92590 - }, - { - "epoch": 0.8186141904913453, - "grad_norm": 1.5332796573638916, - "learning_rate": 3.635643015847758e-05, - "loss": 0.665, - "step": 92600 - }, - { - "epoch": 0.8187025937516575, - "grad_norm": 1.8502647876739502, - "learning_rate": 3.635495677080571e-05, - "loss": 0.6994, - "step": 92610 - }, - { - "epoch": 0.8187909970119698, - "grad_norm": 16.294696807861328, - "learning_rate": 3.635348338313384e-05, - "loss": 0.7789, - "step": 92620 - }, - { - "epoch": 0.818879400272282, - "grad_norm": 6.920746326446533, - "learning_rate": 3.6352009995461964e-05, - "loss": 0.6314, - "step": 92630 - }, - { - "epoch": 0.8189678035325942, - "grad_norm": 4.778506278991699, - "learning_rate": 3.63505366077901e-05, - "loss": 0.7118, - "step": 92640 - }, - { - "epoch": 0.8190562067929065, - "grad_norm": 2.9670848846435547, - "learning_rate": 3.634906322011823e-05, - "loss": 0.6154, - "step": 92650 - }, - { - "epoch": 0.8191446100532188, - "grad_norm": 4.475405216217041, - "learning_rate": 3.6347589832446356e-05, - "loss": 0.6261, - "step": 92660 - }, - { - "epoch": 0.819233013313531, - "grad_norm": 2.0727615356445312, - "learning_rate": 3.6346116444774485e-05, - "loss": 0.791, - "step": 92670 - }, - { - "epoch": 0.8193214165738433, - "grad_norm": 3.1234781742095947, - "learning_rate": 3.634464305710261e-05, - "loss": 0.6408, - "step": 92680 - }, - { - "epoch": 0.8194098198341555, - "grad_norm": 3.2888875007629395, - "learning_rate": 3.634316966943074e-05, - "loss": 0.6352, - "step": 92690 - }, - { - "epoch": 0.8194982230944677, - "grad_norm": 9.846280097961426, - "learning_rate": 3.6341696281758876e-05, - "loss": 0.6515, - "step": 92700 - }, - { - "epoch": 0.81958662635478, - "grad_norm": 2.0807485580444336, - "learning_rate": 3.6340222894087e-05, - "loss": 0.8486, - "step": 92710 - }, - { - "epoch": 0.8196750296150922, - "grad_norm": 2.234642744064331, - "learning_rate": 3.633874950641513e-05, - "loss": 0.5592, - "step": 92720 - }, - { - "epoch": 0.8197634328754044, - "grad_norm": 2.986532211303711, - "learning_rate": 3.633727611874326e-05, - "loss": 0.7216, - "step": 92730 - }, - { - "epoch": 0.8198518361357167, - "grad_norm": 4.456048011779785, - "learning_rate": 3.633580273107139e-05, - "loss": 0.749, - "step": 92740 - }, - { - "epoch": 0.8199402393960289, - "grad_norm": 1.583105206489563, - "learning_rate": 3.633432934339952e-05, - "loss": 0.479, - "step": 92750 - }, - { - "epoch": 0.8200286426563411, - "grad_norm": 7.858306884765625, - "learning_rate": 3.633285595572765e-05, - "loss": 0.6366, - "step": 92760 - }, - { - "epoch": 0.8201170459166534, - "grad_norm": 2.4444801807403564, - "learning_rate": 3.6331382568055775e-05, - "loss": 0.6335, - "step": 92770 - }, - { - "epoch": 0.8202054491769657, - "grad_norm": 3.6671173572540283, - "learning_rate": 3.632990918038391e-05, - "loss": 0.6705, - "step": 92780 - }, - { - "epoch": 0.8202938524372779, - "grad_norm": 7.266944408416748, - "learning_rate": 3.632843579271203e-05, - "loss": 0.5835, - "step": 92790 - }, - { - "epoch": 0.8203822556975902, - "grad_norm": 6.77391242980957, - "learning_rate": 3.632696240504017e-05, - "loss": 0.6936, - "step": 92800 - }, - { - "epoch": 0.8204706589579024, - "grad_norm": 1.7394851446151733, - "learning_rate": 3.6325489017368295e-05, - "loss": 0.6249, - "step": 92810 - }, - { - "epoch": 0.8205590622182146, - "grad_norm": 12.7716646194458, - "learning_rate": 3.6324015629696423e-05, - "loss": 0.7139, - "step": 92820 - }, - { - "epoch": 0.8206474654785268, - "grad_norm": 2.141550064086914, - "learning_rate": 3.632254224202455e-05, - "loss": 0.62, - "step": 92830 - }, - { - "epoch": 0.8207358687388391, - "grad_norm": 5.2641119956970215, - "learning_rate": 3.632106885435269e-05, - "loss": 0.6269, - "step": 92840 - }, - { - "epoch": 0.8208242719991513, - "grad_norm": 10.728096961975098, - "learning_rate": 3.631959546668081e-05, - "loss": 0.5302, - "step": 92850 - }, - { - "epoch": 0.8209126752594635, - "grad_norm": 2.6497015953063965, - "learning_rate": 3.6318122079008944e-05, - "loss": 0.6716, - "step": 92860 - }, - { - "epoch": 0.8210010785197758, - "grad_norm": 1.8583358526229858, - "learning_rate": 3.631664869133707e-05, - "loss": 0.6074, - "step": 92870 - }, - { - "epoch": 0.821089481780088, - "grad_norm": 6.227238178253174, - "learning_rate": 3.63151753036652e-05, - "loss": 0.5987, - "step": 92880 - }, - { - "epoch": 0.8211778850404002, - "grad_norm": 2.291771411895752, - "learning_rate": 3.631370191599333e-05, - "loss": 0.5297, - "step": 92890 - }, - { - "epoch": 0.8212662883007126, - "grad_norm": 7.058529376983643, - "learning_rate": 3.631222852832146e-05, - "loss": 0.7912, - "step": 92900 - }, - { - "epoch": 0.8213546915610248, - "grad_norm": 2.214385509490967, - "learning_rate": 3.6310755140649585e-05, - "loss": 0.7743, - "step": 92910 - }, - { - "epoch": 0.821443094821337, - "grad_norm": 2.1141932010650635, - "learning_rate": 3.630928175297772e-05, - "loss": 0.5796, - "step": 92920 - }, - { - "epoch": 0.8215314980816493, - "grad_norm": 1.197513461112976, - "learning_rate": 3.630780836530584e-05, - "loss": 0.6577, - "step": 92930 - }, - { - "epoch": 0.8216199013419615, - "grad_norm": 4.269301414489746, - "learning_rate": 3.630633497763398e-05, - "loss": 0.6044, - "step": 92940 - }, - { - "epoch": 0.8217083046022737, - "grad_norm": 8.293034553527832, - "learning_rate": 3.6304861589962106e-05, - "loss": 0.6815, - "step": 92950 - }, - { - "epoch": 0.821796707862586, - "grad_norm": 3.6531825065612793, - "learning_rate": 3.6303388202290234e-05, - "loss": 0.6008, - "step": 92960 - }, - { - "epoch": 0.8218851111228982, - "grad_norm": 2.503737211227417, - "learning_rate": 3.630191481461836e-05, - "loss": 0.8088, - "step": 92970 - }, - { - "epoch": 0.8219735143832104, - "grad_norm": 3.013113021850586, - "learning_rate": 3.63004414269465e-05, - "loss": 0.6954, - "step": 92980 - }, - { - "epoch": 0.8220619176435227, - "grad_norm": 4.123807907104492, - "learning_rate": 3.629896803927462e-05, - "loss": 0.5944, - "step": 92990 - }, - { - "epoch": 0.8221503209038349, - "grad_norm": 2.6282057762145996, - "learning_rate": 3.6297494651602754e-05, - "loss": 0.6253, - "step": 93000 - }, - { - "epoch": 0.8222387241641472, - "grad_norm": 1.1501773595809937, - "learning_rate": 3.629602126393088e-05, - "loss": 0.6613, - "step": 93010 - }, - { - "epoch": 0.8223271274244595, - "grad_norm": 1.26383376121521, - "learning_rate": 3.629454787625901e-05, - "loss": 0.6102, - "step": 93020 - }, - { - "epoch": 0.8224155306847717, - "grad_norm": 1.831240177154541, - "learning_rate": 3.629307448858714e-05, - "loss": 0.6655, - "step": 93030 - }, - { - "epoch": 0.8225039339450839, - "grad_norm": 12.614300727844238, - "learning_rate": 3.629160110091527e-05, - "loss": 0.6272, - "step": 93040 - }, - { - "epoch": 0.8225923372053962, - "grad_norm": 4.264084339141846, - "learning_rate": 3.6290127713243396e-05, - "loss": 0.7118, - "step": 93050 - }, - { - "epoch": 0.8226807404657084, - "grad_norm": 3.637566566467285, - "learning_rate": 3.628865432557153e-05, - "loss": 0.7523, - "step": 93060 - }, - { - "epoch": 0.8227691437260206, - "grad_norm": 1.5302402973175049, - "learning_rate": 3.628718093789966e-05, - "loss": 0.6187, - "step": 93070 - }, - { - "epoch": 0.8228575469863328, - "grad_norm": 2.6546175479888916, - "learning_rate": 3.628570755022779e-05, - "loss": 0.6413, - "step": 93080 - }, - { - "epoch": 0.8229459502466451, - "grad_norm": 6.245647430419922, - "learning_rate": 3.6284234162555916e-05, - "loss": 0.72, - "step": 93090 - }, - { - "epoch": 0.8230343535069573, - "grad_norm": 5.670719146728516, - "learning_rate": 3.6282760774884044e-05, - "loss": 0.6134, - "step": 93100 - }, - { - "epoch": 0.8231227567672695, - "grad_norm": 2.2253665924072266, - "learning_rate": 3.628128738721217e-05, - "loss": 0.6956, - "step": 93110 - }, - { - "epoch": 0.8232111600275818, - "grad_norm": 1.2625876665115356, - "learning_rate": 3.627981399954031e-05, - "loss": 0.7247, - "step": 93120 - }, - { - "epoch": 0.8232995632878941, - "grad_norm": 2.1546518802642822, - "learning_rate": 3.6278340611868436e-05, - "loss": 0.6002, - "step": 93130 - }, - { - "epoch": 0.8233879665482063, - "grad_norm": 3.581998348236084, - "learning_rate": 3.6276867224196565e-05, - "loss": 0.7919, - "step": 93140 - }, - { - "epoch": 0.8234763698085186, - "grad_norm": 2.450770378112793, - "learning_rate": 3.627539383652469e-05, - "loss": 0.6428, - "step": 93150 - }, - { - "epoch": 0.8235647730688308, - "grad_norm": 0.8883151412010193, - "learning_rate": 3.627392044885282e-05, - "loss": 0.6831, - "step": 93160 - }, - { - "epoch": 0.823653176329143, - "grad_norm": 2.1562340259552, - "learning_rate": 3.627244706118095e-05, - "loss": 0.7156, - "step": 93170 - }, - { - "epoch": 0.8237415795894553, - "grad_norm": 2.547234058380127, - "learning_rate": 3.627097367350908e-05, - "loss": 0.6353, - "step": 93180 - }, - { - "epoch": 0.8238299828497675, - "grad_norm": 3.1326427459716797, - "learning_rate": 3.626950028583721e-05, - "loss": 0.6836, - "step": 93190 - }, - { - "epoch": 0.8239183861100797, - "grad_norm": 1.0381940603256226, - "learning_rate": 3.626802689816534e-05, - "loss": 0.6069, - "step": 93200 - }, - { - "epoch": 0.824006789370392, - "grad_norm": 3.0886294841766357, - "learning_rate": 3.626655351049347e-05, - "loss": 0.6478, - "step": 93210 - }, - { - "epoch": 0.8240951926307042, - "grad_norm": 3.674008369445801, - "learning_rate": 3.62650801228216e-05, - "loss": 0.6818, - "step": 93220 - }, - { - "epoch": 0.8241835958910164, - "grad_norm": 4.91383171081543, - "learning_rate": 3.6263606735149727e-05, - "loss": 0.7287, - "step": 93230 - }, - { - "epoch": 0.8242719991513286, - "grad_norm": 4.42464542388916, - "learning_rate": 3.6262133347477855e-05, - "loss": 0.6606, - "step": 93240 - }, - { - "epoch": 0.824360402411641, - "grad_norm": 1.6349812746047974, - "learning_rate": 3.626065995980599e-05, - "loss": 0.5162, - "step": 93250 - }, - { - "epoch": 0.8244488056719532, - "grad_norm": 4.765417098999023, - "learning_rate": 3.625918657213411e-05, - "loss": 0.72, - "step": 93260 - }, - { - "epoch": 0.8245372089322655, - "grad_norm": 2.7557642459869385, - "learning_rate": 3.625771318446225e-05, - "loss": 0.6219, - "step": 93270 - }, - { - "epoch": 0.8246256121925777, - "grad_norm": 4.133311748504639, - "learning_rate": 3.6256239796790375e-05, - "loss": 0.6556, - "step": 93280 - }, - { - "epoch": 0.8247140154528899, - "grad_norm": 5.949282646179199, - "learning_rate": 3.6254766409118503e-05, - "loss": 0.5986, - "step": 93290 - }, - { - "epoch": 0.8248024187132021, - "grad_norm": 2.9737560749053955, - "learning_rate": 3.625329302144663e-05, - "loss": 0.7077, - "step": 93300 - }, - { - "epoch": 0.8248908219735144, - "grad_norm": 3.029956102371216, - "learning_rate": 3.625181963377477e-05, - "loss": 0.6805, - "step": 93310 - }, - { - "epoch": 0.8249792252338266, - "grad_norm": 3.5298495292663574, - "learning_rate": 3.625034624610289e-05, - "loss": 0.6401, - "step": 93320 - }, - { - "epoch": 0.8250676284941388, - "grad_norm": 11.33055305480957, - "learning_rate": 3.6248872858431024e-05, - "loss": 0.6757, - "step": 93330 - }, - { - "epoch": 0.8251560317544511, - "grad_norm": 2.6825695037841797, - "learning_rate": 3.624739947075915e-05, - "loss": 0.6602, - "step": 93340 - }, - { - "epoch": 0.8252444350147633, - "grad_norm": 5.537924289703369, - "learning_rate": 3.624592608308728e-05, - "loss": 0.6691, - "step": 93350 - }, - { - "epoch": 0.8253328382750755, - "grad_norm": 1.3969522714614868, - "learning_rate": 3.624445269541541e-05, - "loss": 0.6283, - "step": 93360 - }, - { - "epoch": 0.8254212415353879, - "grad_norm": 3.790619134902954, - "learning_rate": 3.624297930774354e-05, - "loss": 0.6556, - "step": 93370 - }, - { - "epoch": 0.8255096447957001, - "grad_norm": 1.347337245941162, - "learning_rate": 3.6241505920071665e-05, - "loss": 0.5757, - "step": 93380 - }, - { - "epoch": 0.8255980480560123, - "grad_norm": 1.6619949340820312, - "learning_rate": 3.62400325323998e-05, - "loss": 0.5592, - "step": 93390 - }, - { - "epoch": 0.8256864513163246, - "grad_norm": 9.179929733276367, - "learning_rate": 3.623855914472792e-05, - "loss": 0.6658, - "step": 93400 - }, - { - "epoch": 0.8257748545766368, - "grad_norm": 3.4941885471343994, - "learning_rate": 3.623708575705606e-05, - "loss": 0.6295, - "step": 93410 - }, - { - "epoch": 0.825863257836949, - "grad_norm": 7.0606207847595215, - "learning_rate": 3.6235612369384186e-05, - "loss": 0.5741, - "step": 93420 - }, - { - "epoch": 0.8259516610972613, - "grad_norm": 6.441812992095947, - "learning_rate": 3.6234138981712314e-05, - "loss": 0.7791, - "step": 93430 - }, - { - "epoch": 0.8260400643575735, - "grad_norm": 4.038876533508301, - "learning_rate": 3.623266559404044e-05, - "loss": 0.6294, - "step": 93440 - }, - { - "epoch": 0.8261284676178857, - "grad_norm": 1.269675850868225, - "learning_rate": 3.623119220636858e-05, - "loss": 0.6209, - "step": 93450 - }, - { - "epoch": 0.826216870878198, - "grad_norm": 6.175108432769775, - "learning_rate": 3.62297188186967e-05, - "loss": 0.6997, - "step": 93460 - }, - { - "epoch": 0.8263052741385102, - "grad_norm": 6.478979110717773, - "learning_rate": 3.6228245431024834e-05, - "loss": 0.7994, - "step": 93470 - }, - { - "epoch": 0.8263936773988225, - "grad_norm": 7.0543012619018555, - "learning_rate": 3.6226772043352956e-05, - "loss": 0.7055, - "step": 93480 - }, - { - "epoch": 0.8264820806591348, - "grad_norm": 2.9575681686401367, - "learning_rate": 3.622529865568109e-05, - "loss": 0.6662, - "step": 93490 - }, - { - "epoch": 0.826570483919447, - "grad_norm": 1.9254590272903442, - "learning_rate": 3.622382526800922e-05, - "loss": 0.561, - "step": 93500 - }, - { - "epoch": 0.8266588871797592, - "grad_norm": 1.9862453937530518, - "learning_rate": 3.622235188033735e-05, - "loss": 0.7086, - "step": 93510 - }, - { - "epoch": 0.8267472904400714, - "grad_norm": 2.7529067993164062, - "learning_rate": 3.6220878492665476e-05, - "loss": 0.5647, - "step": 93520 - }, - { - "epoch": 0.8268356937003837, - "grad_norm": 1.2451297044754028, - "learning_rate": 3.621940510499361e-05, - "loss": 0.6209, - "step": 93530 - }, - { - "epoch": 0.8269240969606959, - "grad_norm": 5.997985363006592, - "learning_rate": 3.621793171732173e-05, - "loss": 0.6259, - "step": 93540 - }, - { - "epoch": 0.8270125002210081, - "grad_norm": 0.9683815836906433, - "learning_rate": 3.621645832964987e-05, - "loss": 0.5411, - "step": 93550 - }, - { - "epoch": 0.8271009034813204, - "grad_norm": 2.685631275177002, - "learning_rate": 3.6214984941977996e-05, - "loss": 0.6966, - "step": 93560 - }, - { - "epoch": 0.8271893067416326, - "grad_norm": 3.4126617908477783, - "learning_rate": 3.6213511554306124e-05, - "loss": 0.775, - "step": 93570 - }, - { - "epoch": 0.8272777100019448, - "grad_norm": 1.7319691181182861, - "learning_rate": 3.621203816663425e-05, - "loss": 0.6634, - "step": 93580 - }, - { - "epoch": 0.8273661132622571, - "grad_norm": 1.2252484560012817, - "learning_rate": 3.621056477896239e-05, - "loss": 0.6667, - "step": 93590 - }, - { - "epoch": 0.8274545165225694, - "grad_norm": 6.5128679275512695, - "learning_rate": 3.620909139129051e-05, - "loss": 0.6113, - "step": 93600 - }, - { - "epoch": 0.8275429197828816, - "grad_norm": 8.293207168579102, - "learning_rate": 3.6207618003618645e-05, - "loss": 0.6084, - "step": 93610 - }, - { - "epoch": 0.8276313230431939, - "grad_norm": 11.024662017822266, - "learning_rate": 3.6206144615946766e-05, - "loss": 0.7372, - "step": 93620 - }, - { - "epoch": 0.8277197263035061, - "grad_norm": 2.8585751056671143, - "learning_rate": 3.62046712282749e-05, - "loss": 0.5327, - "step": 93630 - }, - { - "epoch": 0.8278081295638183, - "grad_norm": 9.185791969299316, - "learning_rate": 3.620319784060303e-05, - "loss": 0.6877, - "step": 93640 - }, - { - "epoch": 0.8278965328241306, - "grad_norm": 1.2710436582565308, - "learning_rate": 3.620172445293116e-05, - "loss": 0.6548, - "step": 93650 - }, - { - "epoch": 0.8279849360844428, - "grad_norm": 2.4045794010162354, - "learning_rate": 3.6200251065259286e-05, - "loss": 0.7098, - "step": 93660 - }, - { - "epoch": 0.828073339344755, - "grad_norm": 6.192056655883789, - "learning_rate": 3.619877767758742e-05, - "loss": 0.7721, - "step": 93670 - }, - { - "epoch": 0.8281617426050673, - "grad_norm": 4.213747024536133, - "learning_rate": 3.619730428991554e-05, - "loss": 0.6803, - "step": 93680 - }, - { - "epoch": 0.8282501458653795, - "grad_norm": 3.9271175861358643, - "learning_rate": 3.619583090224368e-05, - "loss": 0.5977, - "step": 93690 - }, - { - "epoch": 0.8283385491256917, - "grad_norm": 3.1815481185913086, - "learning_rate": 3.619435751457181e-05, - "loss": 0.642, - "step": 93700 - }, - { - "epoch": 0.828426952386004, - "grad_norm": 8.14946174621582, - "learning_rate": 3.6192884126899935e-05, - "loss": 0.7146, - "step": 93710 - }, - { - "epoch": 0.8285153556463163, - "grad_norm": 3.190246343612671, - "learning_rate": 3.619141073922806e-05, - "loss": 0.5952, - "step": 93720 - }, - { - "epoch": 0.8286037589066285, - "grad_norm": 4.229310989379883, - "learning_rate": 3.618993735155619e-05, - "loss": 0.6867, - "step": 93730 - }, - { - "epoch": 0.8286921621669407, - "grad_norm": 1.8603646755218506, - "learning_rate": 3.618846396388432e-05, - "loss": 0.7172, - "step": 93740 - }, - { - "epoch": 0.828780565427253, - "grad_norm": 2.589078187942505, - "learning_rate": 3.6186990576212455e-05, - "loss": 0.6561, - "step": 93750 - }, - { - "epoch": 0.8288689686875652, - "grad_norm": 6.150266647338867, - "learning_rate": 3.618551718854058e-05, - "loss": 0.7016, - "step": 93760 - }, - { - "epoch": 0.8289573719478774, - "grad_norm": 1.3360360860824585, - "learning_rate": 3.618404380086871e-05, - "loss": 0.6646, - "step": 93770 - }, - { - "epoch": 0.8290457752081897, - "grad_norm": 4.242101192474365, - "learning_rate": 3.618257041319684e-05, - "loss": 0.615, - "step": 93780 - }, - { - "epoch": 0.8291341784685019, - "grad_norm": 11.25772476196289, - "learning_rate": 3.618109702552497e-05, - "loss": 0.6885, - "step": 93790 - }, - { - "epoch": 0.8292225817288141, - "grad_norm": 2.015272378921509, - "learning_rate": 3.61796236378531e-05, - "loss": 0.5991, - "step": 93800 - }, - { - "epoch": 0.8293109849891264, - "grad_norm": 1.615455985069275, - "learning_rate": 3.617815025018123e-05, - "loss": 0.5903, - "step": 93810 - }, - { - "epoch": 0.8293993882494386, - "grad_norm": 1.867724061012268, - "learning_rate": 3.6176676862509354e-05, - "loss": 0.5391, - "step": 93820 - }, - { - "epoch": 0.8294877915097508, - "grad_norm": 1.86784029006958, - "learning_rate": 3.617520347483749e-05, - "loss": 0.6771, - "step": 93830 - }, - { - "epoch": 0.8295761947700632, - "grad_norm": 5.769179821014404, - "learning_rate": 3.617373008716561e-05, - "loss": 0.6704, - "step": 93840 - }, - { - "epoch": 0.8296645980303754, - "grad_norm": 3.508192300796509, - "learning_rate": 3.6172256699493745e-05, - "loss": 0.7017, - "step": 93850 - }, - { - "epoch": 0.8297530012906876, - "grad_norm": 0.9482429623603821, - "learning_rate": 3.6170783311821874e-05, - "loss": 0.6837, - "step": 93860 - }, - { - "epoch": 0.8298414045509999, - "grad_norm": 3.013530731201172, - "learning_rate": 3.616930992415e-05, - "loss": 0.6996, - "step": 93870 - }, - { - "epoch": 0.8299298078113121, - "grad_norm": 1.6802130937576294, - "learning_rate": 3.616783653647813e-05, - "loss": 0.6276, - "step": 93880 - }, - { - "epoch": 0.8300182110716243, - "grad_norm": 2.8071370124816895, - "learning_rate": 3.6166363148806266e-05, - "loss": 0.7872, - "step": 93890 - }, - { - "epoch": 0.8301066143319366, - "grad_norm": 2.110656976699829, - "learning_rate": 3.616488976113439e-05, - "loss": 0.6538, - "step": 93900 - }, - { - "epoch": 0.8301950175922488, - "grad_norm": 3.661648750305176, - "learning_rate": 3.616341637346252e-05, - "loss": 0.6225, - "step": 93910 - }, - { - "epoch": 0.830283420852561, - "grad_norm": 5.264840126037598, - "learning_rate": 3.616194298579065e-05, - "loss": 0.7919, - "step": 93920 - }, - { - "epoch": 0.8303718241128732, - "grad_norm": 2.264892101287842, - "learning_rate": 3.616046959811878e-05, - "loss": 0.5866, - "step": 93930 - }, - { - "epoch": 0.8304602273731855, - "grad_norm": 4.5659708976745605, - "learning_rate": 3.615899621044691e-05, - "loss": 0.5614, - "step": 93940 - }, - { - "epoch": 0.8305486306334977, - "grad_norm": 1.5349199771881104, - "learning_rate": 3.615752282277504e-05, - "loss": 0.5593, - "step": 93950 - }, - { - "epoch": 0.83063703389381, - "grad_norm": 6.145536422729492, - "learning_rate": 3.615604943510317e-05, - "loss": 0.8235, - "step": 93960 - }, - { - "epoch": 0.8307254371541223, - "grad_norm": 4.073675155639648, - "learning_rate": 3.61545760474313e-05, - "loss": 0.5412, - "step": 93970 - }, - { - "epoch": 0.8308138404144345, - "grad_norm": 2.0879557132720947, - "learning_rate": 3.615310265975943e-05, - "loss": 0.5803, - "step": 93980 - }, - { - "epoch": 0.8309022436747467, - "grad_norm": 1.188655972480774, - "learning_rate": 3.6151629272087556e-05, - "loss": 0.6371, - "step": 93990 - }, - { - "epoch": 0.830990646935059, - "grad_norm": 4.851926326751709, - "learning_rate": 3.6150155884415684e-05, - "loss": 0.4916, - "step": 94000 - }, - { - "epoch": 0.8310790501953712, - "grad_norm": 1.4034401178359985, - "learning_rate": 3.614868249674381e-05, - "loss": 0.6945, - "step": 94010 - }, - { - "epoch": 0.8311674534556834, - "grad_norm": 3.4772355556488037, - "learning_rate": 3.614720910907195e-05, - "loss": 0.6497, - "step": 94020 - }, - { - "epoch": 0.8312558567159957, - "grad_norm": 4.7165374755859375, - "learning_rate": 3.6145735721400076e-05, - "loss": 0.7531, - "step": 94030 - }, - { - "epoch": 0.8313442599763079, - "grad_norm": 3.0375566482543945, - "learning_rate": 3.6144262333728205e-05, - "loss": 0.6571, - "step": 94040 - }, - { - "epoch": 0.8314326632366201, - "grad_norm": 1.5466070175170898, - "learning_rate": 3.614278894605633e-05, - "loss": 0.6361, - "step": 94050 - }, - { - "epoch": 0.8315210664969324, - "grad_norm": 5.721677780151367, - "learning_rate": 3.614131555838446e-05, - "loss": 0.6532, - "step": 94060 - }, - { - "epoch": 0.8316094697572447, - "grad_norm": 1.3382488489151, - "learning_rate": 3.613984217071259e-05, - "loss": 0.6434, - "step": 94070 - }, - { - "epoch": 0.8316978730175569, - "grad_norm": 3.216163158416748, - "learning_rate": 3.6138368783040725e-05, - "loss": 0.8104, - "step": 94080 - }, - { - "epoch": 0.8317862762778692, - "grad_norm": 4.24934720993042, - "learning_rate": 3.6136895395368846e-05, - "loss": 0.6685, - "step": 94090 - }, - { - "epoch": 0.8318746795381814, - "grad_norm": 4.726006507873535, - "learning_rate": 3.613542200769698e-05, - "loss": 0.7063, - "step": 94100 - }, - { - "epoch": 0.8319630827984936, - "grad_norm": 5.045265197753906, - "learning_rate": 3.613394862002511e-05, - "loss": 0.6767, - "step": 94110 - }, - { - "epoch": 0.8320514860588059, - "grad_norm": 3.512802839279175, - "learning_rate": 3.613247523235324e-05, - "loss": 0.7515, - "step": 94120 - }, - { - "epoch": 0.8321398893191181, - "grad_norm": 3.4347739219665527, - "learning_rate": 3.6131001844681367e-05, - "loss": 0.5649, - "step": 94130 - }, - { - "epoch": 0.8322282925794303, - "grad_norm": 8.086050033569336, - "learning_rate": 3.61295284570095e-05, - "loss": 0.5289, - "step": 94140 - }, - { - "epoch": 0.8323166958397425, - "grad_norm": 3.934406042098999, - "learning_rate": 3.612805506933762e-05, - "loss": 0.6853, - "step": 94150 - }, - { - "epoch": 0.8324050991000548, - "grad_norm": 1.3040629625320435, - "learning_rate": 3.612658168166576e-05, - "loss": 0.7017, - "step": 94160 - }, - { - "epoch": 0.832493502360367, - "grad_norm": 3.054164409637451, - "learning_rate": 3.612510829399389e-05, - "loss": 0.6139, - "step": 94170 - }, - { - "epoch": 0.8325819056206792, - "grad_norm": 6.750848293304443, - "learning_rate": 3.6123634906322015e-05, - "loss": 0.6179, - "step": 94180 - }, - { - "epoch": 0.8326703088809916, - "grad_norm": 5.214075088500977, - "learning_rate": 3.6122161518650143e-05, - "loss": 0.6383, - "step": 94190 - }, - { - "epoch": 0.8327587121413038, - "grad_norm": 5.887363910675049, - "learning_rate": 3.612068813097827e-05, - "loss": 0.5707, - "step": 94200 - }, - { - "epoch": 0.832847115401616, - "grad_norm": 2.3657591342926025, - "learning_rate": 3.61192147433064e-05, - "loss": 0.5935, - "step": 94210 - }, - { - "epoch": 0.8329355186619283, - "grad_norm": 4.025388717651367, - "learning_rate": 3.6117741355634535e-05, - "loss": 0.6411, - "step": 94220 - }, - { - "epoch": 0.8330239219222405, - "grad_norm": 5.579286098480225, - "learning_rate": 3.611626796796266e-05, - "loss": 0.7095, - "step": 94230 - }, - { - "epoch": 0.8331123251825527, - "grad_norm": 2.4436745643615723, - "learning_rate": 3.611479458029079e-05, - "loss": 0.6464, - "step": 94240 - }, - { - "epoch": 0.833200728442865, - "grad_norm": 3.3473572731018066, - "learning_rate": 3.611332119261892e-05, - "loss": 0.6948, - "step": 94250 - }, - { - "epoch": 0.8332891317031772, - "grad_norm": 4.559767246246338, - "learning_rate": 3.611184780494705e-05, - "loss": 0.5037, - "step": 94260 - }, - { - "epoch": 0.8333775349634894, - "grad_norm": 2.4051787853240967, - "learning_rate": 3.611037441727518e-05, - "loss": 0.6604, - "step": 94270 - }, - { - "epoch": 0.8334659382238017, - "grad_norm": 1.8673170804977417, - "learning_rate": 3.610890102960331e-05, - "loss": 0.6683, - "step": 94280 - }, - { - "epoch": 0.8335543414841139, - "grad_norm": 2.71720290184021, - "learning_rate": 3.6107427641931434e-05, - "loss": 0.7054, - "step": 94290 - }, - { - "epoch": 0.8336427447444261, - "grad_norm": 4.1643571853637695, - "learning_rate": 3.610595425425957e-05, - "loss": 0.6801, - "step": 94300 - }, - { - "epoch": 0.8337311480047385, - "grad_norm": 2.6858880519866943, - "learning_rate": 3.610448086658769e-05, - "loss": 0.6288, - "step": 94310 - }, - { - "epoch": 0.8338195512650507, - "grad_norm": 2.0185506343841553, - "learning_rate": 3.6103007478915826e-05, - "loss": 0.6989, - "step": 94320 - }, - { - "epoch": 0.8339079545253629, - "grad_norm": 3.8516831398010254, - "learning_rate": 3.6101534091243954e-05, - "loss": 0.5838, - "step": 94330 - }, - { - "epoch": 0.8339963577856752, - "grad_norm": 8.46336841583252, - "learning_rate": 3.610006070357208e-05, - "loss": 0.8762, - "step": 94340 - }, - { - "epoch": 0.8340847610459874, - "grad_norm": 3.932737350463867, - "learning_rate": 3.609858731590021e-05, - "loss": 0.6922, - "step": 94350 - }, - { - "epoch": 0.8341731643062996, - "grad_norm": 13.593315124511719, - "learning_rate": 3.6097113928228346e-05, - "loss": 0.6391, - "step": 94360 - }, - { - "epoch": 0.8342615675666119, - "grad_norm": 2.8367323875427246, - "learning_rate": 3.609564054055647e-05, - "loss": 0.5589, - "step": 94370 - }, - { - "epoch": 0.8343499708269241, - "grad_norm": 2.1782689094543457, - "learning_rate": 3.60941671528846e-05, - "loss": 0.558, - "step": 94380 - }, - { - "epoch": 0.8344383740872363, - "grad_norm": 2.604386568069458, - "learning_rate": 3.609269376521273e-05, - "loss": 0.6621, - "step": 94390 - }, - { - "epoch": 0.8345267773475485, - "grad_norm": 2.1232829093933105, - "learning_rate": 3.609122037754086e-05, - "loss": 0.6319, - "step": 94400 - }, - { - "epoch": 0.8346151806078608, - "grad_norm": 4.2524919509887695, - "learning_rate": 3.608974698986899e-05, - "loss": 0.7322, - "step": 94410 - }, - { - "epoch": 0.834703583868173, - "grad_norm": 2.8115828037261963, - "learning_rate": 3.608827360219712e-05, - "loss": 0.7946, - "step": 94420 - }, - { - "epoch": 0.8347919871284853, - "grad_norm": 6.732259750366211, - "learning_rate": 3.6086800214525244e-05, - "loss": 0.6632, - "step": 94430 - }, - { - "epoch": 0.8348803903887976, - "grad_norm": 2.397207736968994, - "learning_rate": 3.608532682685338e-05, - "loss": 0.5989, - "step": 94440 - }, - { - "epoch": 0.8349687936491098, - "grad_norm": 4.116695880889893, - "learning_rate": 3.60838534391815e-05, - "loss": 0.6338, - "step": 94450 - }, - { - "epoch": 0.835057196909422, - "grad_norm": 1.6942788362503052, - "learning_rate": 3.6082380051509636e-05, - "loss": 0.7084, - "step": 94460 - }, - { - "epoch": 0.8351456001697343, - "grad_norm": 7.596015930175781, - "learning_rate": 3.6080906663837764e-05, - "loss": 0.6815, - "step": 94470 - }, - { - "epoch": 0.8352340034300465, - "grad_norm": 4.143629550933838, - "learning_rate": 3.607943327616589e-05, - "loss": 0.672, - "step": 94480 - }, - { - "epoch": 0.8353224066903587, - "grad_norm": 4.248392105102539, - "learning_rate": 3.607795988849402e-05, - "loss": 0.6851, - "step": 94490 - }, - { - "epoch": 0.835410809950671, - "grad_norm": 1.733974814414978, - "learning_rate": 3.6076486500822156e-05, - "loss": 0.6572, - "step": 94500 - }, - { - "epoch": 0.8354992132109832, - "grad_norm": 4.106632232666016, - "learning_rate": 3.607501311315028e-05, - "loss": 0.5702, - "step": 94510 - }, - { - "epoch": 0.8355876164712954, - "grad_norm": 4.915042400360107, - "learning_rate": 3.607353972547841e-05, - "loss": 0.6478, - "step": 94520 - }, - { - "epoch": 0.8356760197316077, - "grad_norm": 6.578536510467529, - "learning_rate": 3.607206633780654e-05, - "loss": 0.7171, - "step": 94530 - }, - { - "epoch": 0.83576442299192, - "grad_norm": 1.7583603858947754, - "learning_rate": 3.607059295013467e-05, - "loss": 0.6362, - "step": 94540 - }, - { - "epoch": 0.8358528262522322, - "grad_norm": 3.7528419494628906, - "learning_rate": 3.60691195624628e-05, - "loss": 0.6052, - "step": 94550 - }, - { - "epoch": 0.8359412295125445, - "grad_norm": 12.52851676940918, - "learning_rate": 3.6067646174790926e-05, - "loss": 0.4435, - "step": 94560 - }, - { - "epoch": 0.8360296327728567, - "grad_norm": 2.9721691608428955, - "learning_rate": 3.6066172787119055e-05, - "loss": 0.652, - "step": 94570 - }, - { - "epoch": 0.8361180360331689, - "grad_norm": 8.937162399291992, - "learning_rate": 3.606469939944719e-05, - "loss": 0.5816, - "step": 94580 - }, - { - "epoch": 0.8362064392934812, - "grad_norm": 7.869368553161621, - "learning_rate": 3.606322601177531e-05, - "loss": 0.6281, - "step": 94590 - }, - { - "epoch": 0.8362948425537934, - "grad_norm": 14.269586563110352, - "learning_rate": 3.6061752624103447e-05, - "loss": 0.7967, - "step": 94600 - }, - { - "epoch": 0.8363832458141056, - "grad_norm": 6.290854454040527, - "learning_rate": 3.6060279236431575e-05, - "loss": 0.7268, - "step": 94610 - }, - { - "epoch": 0.8364716490744178, - "grad_norm": 1.0632624626159668, - "learning_rate": 3.60588058487597e-05, - "loss": 0.5505, - "step": 94620 - }, - { - "epoch": 0.8365600523347301, - "grad_norm": 1.3970060348510742, - "learning_rate": 3.605733246108783e-05, - "loss": 0.5961, - "step": 94630 - }, - { - "epoch": 0.8366484555950423, - "grad_norm": 5.421457290649414, - "learning_rate": 3.605585907341597e-05, - "loss": 0.6474, - "step": 94640 - }, - { - "epoch": 0.8367368588553545, - "grad_norm": 4.138095378875732, - "learning_rate": 3.605438568574409e-05, - "loss": 0.6886, - "step": 94650 - }, - { - "epoch": 0.8368252621156669, - "grad_norm": 3.2270023822784424, - "learning_rate": 3.6052912298072223e-05, - "loss": 0.7935, - "step": 94660 - }, - { - "epoch": 0.8369136653759791, - "grad_norm": 3.0651822090148926, - "learning_rate": 3.6051438910400345e-05, - "loss": 0.6458, - "step": 94670 - }, - { - "epoch": 0.8370020686362913, - "grad_norm": 1.2351793050765991, - "learning_rate": 3.604996552272848e-05, - "loss": 0.5934, - "step": 94680 - }, - { - "epoch": 0.8370904718966036, - "grad_norm": 2.908625602722168, - "learning_rate": 3.604849213505661e-05, - "loss": 0.6298, - "step": 94690 - }, - { - "epoch": 0.8371788751569158, - "grad_norm": 2.9542267322540283, - "learning_rate": 3.604701874738474e-05, - "loss": 0.6277, - "step": 94700 - }, - { - "epoch": 0.837267278417228, - "grad_norm": 0.9569472670555115, - "learning_rate": 3.6045545359712865e-05, - "loss": 0.6418, - "step": 94710 - }, - { - "epoch": 0.8373556816775403, - "grad_norm": 2.43017578125, - "learning_rate": 3.6044071972041e-05, - "loss": 0.7019, - "step": 94720 - }, - { - "epoch": 0.8374440849378525, - "grad_norm": 3.396594524383545, - "learning_rate": 3.604259858436912e-05, - "loss": 0.5352, - "step": 94730 - }, - { - "epoch": 0.8375324881981647, - "grad_norm": 5.04083251953125, - "learning_rate": 3.604112519669726e-05, - "loss": 0.618, - "step": 94740 - }, - { - "epoch": 0.837620891458477, - "grad_norm": 8.53906536102295, - "learning_rate": 3.6039651809025385e-05, - "loss": 0.7045, - "step": 94750 - }, - { - "epoch": 0.8377092947187892, - "grad_norm": 1.6326218843460083, - "learning_rate": 3.6038178421353514e-05, - "loss": 0.5973, - "step": 94760 - }, - { - "epoch": 0.8377976979791014, - "grad_norm": 3.714289665222168, - "learning_rate": 3.603670503368164e-05, - "loss": 0.697, - "step": 94770 - }, - { - "epoch": 0.8378861012394138, - "grad_norm": 8.500152587890625, - "learning_rate": 3.603523164600977e-05, - "loss": 0.628, - "step": 94780 - }, - { - "epoch": 0.837974504499726, - "grad_norm": 4.840029239654541, - "learning_rate": 3.60337582583379e-05, - "loss": 0.7251, - "step": 94790 - }, - { - "epoch": 0.8380629077600382, - "grad_norm": 2.8530380725860596, - "learning_rate": 3.6032284870666034e-05, - "loss": 0.7012, - "step": 94800 - }, - { - "epoch": 0.8381513110203505, - "grad_norm": 23.596668243408203, - "learning_rate": 3.603081148299416e-05, - "loss": 0.6092, - "step": 94810 - }, - { - "epoch": 0.8382397142806627, - "grad_norm": 15.336099624633789, - "learning_rate": 3.602933809532229e-05, - "loss": 0.8211, - "step": 94820 - }, - { - "epoch": 0.8383281175409749, - "grad_norm": 2.4705581665039062, - "learning_rate": 3.602786470765042e-05, - "loss": 0.6362, - "step": 94830 - }, - { - "epoch": 0.8384165208012871, - "grad_norm": 8.284422874450684, - "learning_rate": 3.602639131997855e-05, - "loss": 0.636, - "step": 94840 - }, - { - "epoch": 0.8385049240615994, - "grad_norm": 1.1233346462249756, - "learning_rate": 3.6024917932306676e-05, - "loss": 0.7047, - "step": 94850 - }, - { - "epoch": 0.8385933273219116, - "grad_norm": 2.7768990993499756, - "learning_rate": 3.602344454463481e-05, - "loss": 0.5937, - "step": 94860 - }, - { - "epoch": 0.8386817305822238, - "grad_norm": 4.231262683868408, - "learning_rate": 3.602197115696294e-05, - "loss": 0.7304, - "step": 94870 - }, - { - "epoch": 0.8387701338425361, - "grad_norm": 3.647117853164673, - "learning_rate": 3.602049776929107e-05, - "loss": 0.7706, - "step": 94880 - }, - { - "epoch": 0.8388585371028483, - "grad_norm": 3.821812152862549, - "learning_rate": 3.6019024381619196e-05, - "loss": 0.61, - "step": 94890 - }, - { - "epoch": 0.8389469403631606, - "grad_norm": 3.63480806350708, - "learning_rate": 3.6017550993947324e-05, - "loss": 0.7079, - "step": 94900 - }, - { - "epoch": 0.8390353436234729, - "grad_norm": 3.7317652702331543, - "learning_rate": 3.601607760627545e-05, - "loss": 0.7602, - "step": 94910 - }, - { - "epoch": 0.8391237468837851, - "grad_norm": 7.364920139312744, - "learning_rate": 3.601460421860358e-05, - "loss": 0.7488, - "step": 94920 - }, - { - "epoch": 0.8392121501440973, - "grad_norm": 23.182640075683594, - "learning_rate": 3.6013130830931716e-05, - "loss": 0.6611, - "step": 94930 - }, - { - "epoch": 0.8393005534044096, - "grad_norm": 4.278759479522705, - "learning_rate": 3.6011657443259844e-05, - "loss": 0.6502, - "step": 94940 - }, - { - "epoch": 0.8393889566647218, - "grad_norm": 1.0418457984924316, - "learning_rate": 3.601018405558797e-05, - "loss": 0.6761, - "step": 94950 - }, - { - "epoch": 0.839477359925034, - "grad_norm": 2.0343780517578125, - "learning_rate": 3.60087106679161e-05, - "loss": 0.5939, - "step": 94960 - }, - { - "epoch": 0.8395657631853463, - "grad_norm": 1.6717872619628906, - "learning_rate": 3.600723728024423e-05, - "loss": 0.6276, - "step": 94970 - }, - { - "epoch": 0.8396541664456585, - "grad_norm": 9.336103439331055, - "learning_rate": 3.600576389257236e-05, - "loss": 0.6517, - "step": 94980 - }, - { - "epoch": 0.8397425697059707, - "grad_norm": 2.040410041809082, - "learning_rate": 3.600429050490049e-05, - "loss": 0.5953, - "step": 94990 - }, - { - "epoch": 0.839830972966283, - "grad_norm": 3.3423497676849365, - "learning_rate": 3.600281711722862e-05, - "loss": 0.683, - "step": 95000 - }, - { - "epoch": 0.8399193762265952, - "grad_norm": 1.4555820226669312, - "learning_rate": 3.600134372955675e-05, - "loss": 0.5977, - "step": 95010 - }, - { - "epoch": 0.8400077794869075, - "grad_norm": 5.59188175201416, - "learning_rate": 3.599987034188488e-05, - "loss": 0.7248, - "step": 95020 - }, - { - "epoch": 0.8400961827472198, - "grad_norm": 3.424687147140503, - "learning_rate": 3.5998396954213006e-05, - "loss": 0.5415, - "step": 95030 - }, - { - "epoch": 0.840184586007532, - "grad_norm": 11.723047256469727, - "learning_rate": 3.5996923566541135e-05, - "loss": 0.5556, - "step": 95040 - }, - { - "epoch": 0.8402729892678442, - "grad_norm": 1.008385419845581, - "learning_rate": 3.599545017886927e-05, - "loss": 0.546, - "step": 95050 - }, - { - "epoch": 0.8403613925281564, - "grad_norm": 10.497899055480957, - "learning_rate": 3.599397679119739e-05, - "loss": 0.8233, - "step": 95060 - }, - { - "epoch": 0.8404497957884687, - "grad_norm": 5.900903224945068, - "learning_rate": 3.599250340352553e-05, - "loss": 0.6416, - "step": 95070 - }, - { - "epoch": 0.8405381990487809, - "grad_norm": 1.6267361640930176, - "learning_rate": 3.5991030015853655e-05, - "loss": 0.5124, - "step": 95080 - }, - { - "epoch": 0.8406266023090931, - "grad_norm": 1.2869590520858765, - "learning_rate": 3.598955662818178e-05, - "loss": 0.5858, - "step": 95090 - }, - { - "epoch": 0.8407150055694054, - "grad_norm": 1.0540703535079956, - "learning_rate": 3.598808324050991e-05, - "loss": 0.6063, - "step": 95100 - }, - { - "epoch": 0.8408034088297176, - "grad_norm": 8.46433162689209, - "learning_rate": 3.598660985283805e-05, - "loss": 0.6762, - "step": 95110 - }, - { - "epoch": 0.8408918120900298, - "grad_norm": 2.2610857486724854, - "learning_rate": 3.598513646516617e-05, - "loss": 0.6093, - "step": 95120 - }, - { - "epoch": 0.8409802153503422, - "grad_norm": 4.000816822052002, - "learning_rate": 3.5983663077494304e-05, - "loss": 0.5154, - "step": 95130 - }, - { - "epoch": 0.8410686186106544, - "grad_norm": 3.9335789680480957, - "learning_rate": 3.5982189689822425e-05, - "loss": 0.6145, - "step": 95140 - }, - { - "epoch": 0.8411570218709666, - "grad_norm": 2.7194511890411377, - "learning_rate": 3.598071630215056e-05, - "loss": 0.5529, - "step": 95150 - }, - { - "epoch": 0.8412454251312789, - "grad_norm": 13.901379585266113, - "learning_rate": 3.597924291447869e-05, - "loss": 0.6166, - "step": 95160 - }, - { - "epoch": 0.8413338283915911, - "grad_norm": 6.461185932159424, - "learning_rate": 3.597776952680682e-05, - "loss": 0.678, - "step": 95170 - }, - { - "epoch": 0.8414222316519033, - "grad_norm": 5.392058372497559, - "learning_rate": 3.5976296139134945e-05, - "loss": 0.66, - "step": 95180 - }, - { - "epoch": 0.8415106349122156, - "grad_norm": 4.977040767669678, - "learning_rate": 3.597482275146308e-05, - "loss": 0.62, - "step": 95190 - }, - { - "epoch": 0.8415990381725278, - "grad_norm": 3.355560302734375, - "learning_rate": 3.59733493637912e-05, - "loss": 0.6605, - "step": 95200 - }, - { - "epoch": 0.84168744143284, - "grad_norm": 2.369241714477539, - "learning_rate": 3.597187597611934e-05, - "loss": 0.7076, - "step": 95210 - }, - { - "epoch": 0.8417758446931523, - "grad_norm": 10.198139190673828, - "learning_rate": 3.5970402588447466e-05, - "loss": 0.6616, - "step": 95220 - }, - { - "epoch": 0.8418642479534645, - "grad_norm": 5.111310005187988, - "learning_rate": 3.5968929200775594e-05, - "loss": 0.6097, - "step": 95230 - }, - { - "epoch": 0.8419526512137767, - "grad_norm": 3.880850076675415, - "learning_rate": 3.596745581310372e-05, - "loss": 0.6163, - "step": 95240 - }, - { - "epoch": 0.8420410544740891, - "grad_norm": 2.7748265266418457, - "learning_rate": 3.596598242543185e-05, - "loss": 0.7044, - "step": 95250 - }, - { - "epoch": 0.8421294577344013, - "grad_norm": 1.8967853784561157, - "learning_rate": 3.596450903775998e-05, - "loss": 0.6364, - "step": 95260 - }, - { - "epoch": 0.8422178609947135, - "grad_norm": 2.3596765995025635, - "learning_rate": 3.5963035650088114e-05, - "loss": 0.6644, - "step": 95270 - }, - { - "epoch": 0.8423062642550257, - "grad_norm": 1.3654931783676147, - "learning_rate": 3.5961562262416236e-05, - "loss": 0.5585, - "step": 95280 - }, - { - "epoch": 0.842394667515338, - "grad_norm": 3.444798231124878, - "learning_rate": 3.596008887474437e-05, - "loss": 0.639, - "step": 95290 - }, - { - "epoch": 0.8424830707756502, - "grad_norm": 2.277906656265259, - "learning_rate": 3.59586154870725e-05, - "loss": 0.609, - "step": 95300 - }, - { - "epoch": 0.8425714740359624, - "grad_norm": 7.962225437164307, - "learning_rate": 3.595714209940063e-05, - "loss": 0.5751, - "step": 95310 - }, - { - "epoch": 0.8426598772962747, - "grad_norm": 9.873428344726562, - "learning_rate": 3.5955668711728756e-05, - "loss": 0.7628, - "step": 95320 - }, - { - "epoch": 0.8427482805565869, - "grad_norm": 6.623358249664307, - "learning_rate": 3.595419532405689e-05, - "loss": 0.6902, - "step": 95330 - }, - { - "epoch": 0.8428366838168991, - "grad_norm": 0.8233844637870789, - "learning_rate": 3.595272193638501e-05, - "loss": 0.5426, - "step": 95340 - }, - { - "epoch": 0.8429250870772114, - "grad_norm": 5.654267311096191, - "learning_rate": 3.595124854871315e-05, - "loss": 0.795, - "step": 95350 - }, - { - "epoch": 0.8430134903375236, - "grad_norm": 4.271440029144287, - "learning_rate": 3.5949775161041276e-05, - "loss": 0.6115, - "step": 95360 - }, - { - "epoch": 0.8431018935978359, - "grad_norm": 3.573516607284546, - "learning_rate": 3.5948301773369404e-05, - "loss": 0.6523, - "step": 95370 - }, - { - "epoch": 0.8431902968581482, - "grad_norm": 2.437706470489502, - "learning_rate": 3.594682838569753e-05, - "loss": 0.7475, - "step": 95380 - }, - { - "epoch": 0.8432787001184604, - "grad_norm": 2.68989634513855, - "learning_rate": 3.594535499802566e-05, - "loss": 0.6564, - "step": 95390 - }, - { - "epoch": 0.8433671033787726, - "grad_norm": 3.355306386947632, - "learning_rate": 3.594388161035379e-05, - "loss": 0.7606, - "step": 95400 - }, - { - "epoch": 0.8434555066390849, - "grad_norm": 2.803866386413574, - "learning_rate": 3.5942408222681925e-05, - "loss": 0.5561, - "step": 95410 - }, - { - "epoch": 0.8435439098993971, - "grad_norm": 11.35826301574707, - "learning_rate": 3.5940934835010046e-05, - "loss": 0.6679, - "step": 95420 - }, - { - "epoch": 0.8436323131597093, - "grad_norm": 2.249281167984009, - "learning_rate": 3.593946144733818e-05, - "loss": 0.5811, - "step": 95430 - }, - { - "epoch": 0.8437207164200216, - "grad_norm": 3.558587074279785, - "learning_rate": 3.593798805966631e-05, - "loss": 0.6888, - "step": 95440 - }, - { - "epoch": 0.8438091196803338, - "grad_norm": 5.050012111663818, - "learning_rate": 3.593651467199444e-05, - "loss": 0.6763, - "step": 95450 - }, - { - "epoch": 0.843897522940646, - "grad_norm": 2.9509243965148926, - "learning_rate": 3.5935041284322566e-05, - "loss": 0.5862, - "step": 95460 - }, - { - "epoch": 0.8439859262009582, - "grad_norm": 8.164398193359375, - "learning_rate": 3.59335678966507e-05, - "loss": 0.5383, - "step": 95470 - }, - { - "epoch": 0.8440743294612705, - "grad_norm": 4.784492492675781, - "learning_rate": 3.593209450897882e-05, - "loss": 0.6068, - "step": 95480 - }, - { - "epoch": 0.8441627327215828, - "grad_norm": 1.5843349695205688, - "learning_rate": 3.593062112130696e-05, - "loss": 0.6065, - "step": 95490 - }, - { - "epoch": 0.844251135981895, - "grad_norm": 1.4645557403564453, - "learning_rate": 3.592914773363508e-05, - "loss": 0.5672, - "step": 95500 - }, - { - "epoch": 0.8443395392422073, - "grad_norm": 2.9640069007873535, - "learning_rate": 3.5927674345963215e-05, - "loss": 0.6791, - "step": 95510 - }, - { - "epoch": 0.8444279425025195, - "grad_norm": 5.711122989654541, - "learning_rate": 3.592620095829134e-05, - "loss": 0.5923, - "step": 95520 - }, - { - "epoch": 0.8445163457628317, - "grad_norm": 1.127500295639038, - "learning_rate": 3.592472757061947e-05, - "loss": 0.6712, - "step": 95530 - }, - { - "epoch": 0.844604749023144, - "grad_norm": 8.827896118164062, - "learning_rate": 3.59232541829476e-05, - "loss": 0.6406, - "step": 95540 - }, - { - "epoch": 0.8446931522834562, - "grad_norm": 4.571467399597168, - "learning_rate": 3.5921780795275735e-05, - "loss": 0.7155, - "step": 95550 - }, - { - "epoch": 0.8447815555437684, - "grad_norm": 2.5723965167999268, - "learning_rate": 3.592030740760386e-05, - "loss": 0.6263, - "step": 95560 - }, - { - "epoch": 0.8448699588040807, - "grad_norm": 5.320863246917725, - "learning_rate": 3.591883401993199e-05, - "loss": 0.6627, - "step": 95570 - }, - { - "epoch": 0.8449583620643929, - "grad_norm": 3.796985149383545, - "learning_rate": 3.591736063226012e-05, - "loss": 0.6255, - "step": 95580 - }, - { - "epoch": 0.8450467653247051, - "grad_norm": 6.235360622406006, - "learning_rate": 3.591588724458825e-05, - "loss": 0.7415, - "step": 95590 - }, - { - "epoch": 0.8451351685850174, - "grad_norm": 4.304016590118408, - "learning_rate": 3.591441385691638e-05, - "loss": 0.6035, - "step": 95600 - }, - { - "epoch": 0.8452235718453297, - "grad_norm": 1.7260944843292236, - "learning_rate": 3.5912940469244505e-05, - "loss": 0.7029, - "step": 95610 - }, - { - "epoch": 0.8453119751056419, - "grad_norm": 10.873573303222656, - "learning_rate": 3.5911467081572634e-05, - "loss": 0.5789, - "step": 95620 - }, - { - "epoch": 0.8454003783659542, - "grad_norm": 3.0364582538604736, - "learning_rate": 3.590999369390077e-05, - "loss": 0.6372, - "step": 95630 - }, - { - "epoch": 0.8454887816262664, - "grad_norm": 3.8201138973236084, - "learning_rate": 3.590852030622889e-05, - "loss": 0.6013, - "step": 95640 - }, - { - "epoch": 0.8455771848865786, - "grad_norm": 6.827417850494385, - "learning_rate": 3.5907046918557025e-05, - "loss": 0.6801, - "step": 95650 - }, - { - "epoch": 0.8456655881468909, - "grad_norm": 2.6591992378234863, - "learning_rate": 3.5905573530885154e-05, - "loss": 0.6349, - "step": 95660 - }, - { - "epoch": 0.8457539914072031, - "grad_norm": 7.244960308074951, - "learning_rate": 3.590410014321328e-05, - "loss": 0.6183, - "step": 95670 - }, - { - "epoch": 0.8458423946675153, - "grad_norm": 3.011967420578003, - "learning_rate": 3.590262675554141e-05, - "loss": 0.5659, - "step": 95680 - }, - { - "epoch": 0.8459307979278275, - "grad_norm": 2.5808794498443604, - "learning_rate": 3.5901153367869546e-05, - "loss": 0.7281, - "step": 95690 - }, - { - "epoch": 0.8460192011881398, - "grad_norm": 2.2106897830963135, - "learning_rate": 3.589967998019767e-05, - "loss": 0.6309, - "step": 95700 - }, - { - "epoch": 0.846107604448452, - "grad_norm": 13.006103515625, - "learning_rate": 3.58982065925258e-05, - "loss": 0.799, - "step": 95710 - }, - { - "epoch": 0.8461960077087644, - "grad_norm": 2.7790842056274414, - "learning_rate": 3.589673320485393e-05, - "loss": 0.6459, - "step": 95720 - }, - { - "epoch": 0.8462844109690766, - "grad_norm": 8.42682933807373, - "learning_rate": 3.589525981718206e-05, - "loss": 0.6586, - "step": 95730 - }, - { - "epoch": 0.8463728142293888, - "grad_norm": 3.0759105682373047, - "learning_rate": 3.589378642951019e-05, - "loss": 0.7381, - "step": 95740 - }, - { - "epoch": 0.846461217489701, - "grad_norm": 7.410990238189697, - "learning_rate": 3.5892313041838316e-05, - "loss": 0.7721, - "step": 95750 - }, - { - "epoch": 0.8465496207500133, - "grad_norm": 2.3688104152679443, - "learning_rate": 3.5890839654166444e-05, - "loss": 0.6294, - "step": 95760 - }, - { - "epoch": 0.8466380240103255, - "grad_norm": 1.406822919845581, - "learning_rate": 3.588936626649458e-05, - "loss": 0.6041, - "step": 95770 - }, - { - "epoch": 0.8467264272706377, - "grad_norm": 5.143759727478027, - "learning_rate": 3.588789287882271e-05, - "loss": 0.5975, - "step": 95780 - }, - { - "epoch": 0.84681483053095, - "grad_norm": 3.095275640487671, - "learning_rate": 3.5886419491150836e-05, - "loss": 0.5834, - "step": 95790 - }, - { - "epoch": 0.8469032337912622, - "grad_norm": 2.8376457691192627, - "learning_rate": 3.5884946103478964e-05, - "loss": 0.771, - "step": 95800 - }, - { - "epoch": 0.8469916370515744, - "grad_norm": 0.9358116388320923, - "learning_rate": 3.588347271580709e-05, - "loss": 0.5959, - "step": 95810 - }, - { - "epoch": 0.8470800403118867, - "grad_norm": 1.056120753288269, - "learning_rate": 3.588199932813522e-05, - "loss": 0.5906, - "step": 95820 - }, - { - "epoch": 0.8471684435721989, - "grad_norm": 6.234168529510498, - "learning_rate": 3.5880525940463356e-05, - "loss": 0.6878, - "step": 95830 - }, - { - "epoch": 0.8472568468325112, - "grad_norm": 4.215854167938232, - "learning_rate": 3.5879052552791484e-05, - "loss": 0.6, - "step": 95840 - }, - { - "epoch": 0.8473452500928235, - "grad_norm": 3.6773717403411865, - "learning_rate": 3.587757916511961e-05, - "loss": 0.5118, - "step": 95850 - }, - { - "epoch": 0.8474336533531357, - "grad_norm": 2.5729904174804688, - "learning_rate": 3.587610577744774e-05, - "loss": 0.6244, - "step": 95860 - }, - { - "epoch": 0.8475220566134479, - "grad_norm": 6.450924396514893, - "learning_rate": 3.587463238977587e-05, - "loss": 0.7929, - "step": 95870 - }, - { - "epoch": 0.8476104598737602, - "grad_norm": 3.9789950847625732, - "learning_rate": 3.5873159002104e-05, - "loss": 0.8084, - "step": 95880 - }, - { - "epoch": 0.8476988631340724, - "grad_norm": 1.5721575021743774, - "learning_rate": 3.5871685614432126e-05, - "loss": 0.5535, - "step": 95890 - }, - { - "epoch": 0.8477872663943846, - "grad_norm": 1.7342270612716675, - "learning_rate": 3.587021222676026e-05, - "loss": 0.5874, - "step": 95900 - }, - { - "epoch": 0.8478756696546969, - "grad_norm": 1.1704521179199219, - "learning_rate": 3.586873883908839e-05, - "loss": 0.6115, - "step": 95910 - }, - { - "epoch": 0.8479640729150091, - "grad_norm": 3.590111255645752, - "learning_rate": 3.586726545141652e-05, - "loss": 0.6238, - "step": 95920 - }, - { - "epoch": 0.8480524761753213, - "grad_norm": 1.3525285720825195, - "learning_rate": 3.5865792063744646e-05, - "loss": 0.6814, - "step": 95930 - }, - { - "epoch": 0.8481408794356335, - "grad_norm": 5.288671016693115, - "learning_rate": 3.5864318676072775e-05, - "loss": 0.6686, - "step": 95940 - }, - { - "epoch": 0.8482292826959458, - "grad_norm": 6.754096508026123, - "learning_rate": 3.58628452884009e-05, - "loss": 0.6281, - "step": 95950 - }, - { - "epoch": 0.8483176859562581, - "grad_norm": 3.5103559494018555, - "learning_rate": 3.586137190072904e-05, - "loss": 0.6366, - "step": 95960 - }, - { - "epoch": 0.8484060892165703, - "grad_norm": 8.585774421691895, - "learning_rate": 3.585989851305716e-05, - "loss": 0.684, - "step": 95970 - }, - { - "epoch": 0.8484944924768826, - "grad_norm": 7.428455829620361, - "learning_rate": 3.5858425125385295e-05, - "loss": 0.5793, - "step": 95980 - }, - { - "epoch": 0.8485828957371948, - "grad_norm": 7.0970916748046875, - "learning_rate": 3.585695173771342e-05, - "loss": 0.5722, - "step": 95990 - }, - { - "epoch": 0.848671298997507, - "grad_norm": 6.8665337562561035, - "learning_rate": 3.585547835004155e-05, - "loss": 0.527, - "step": 96000 - }, - { - "epoch": 0.8487597022578193, - "grad_norm": 1.715965986251831, - "learning_rate": 3.585400496236968e-05, - "loss": 0.6227, - "step": 96010 - }, - { - "epoch": 0.8488481055181315, - "grad_norm": 2.6507320404052734, - "learning_rate": 3.5852531574697815e-05, - "loss": 0.7047, - "step": 96020 - }, - { - "epoch": 0.8489365087784437, - "grad_norm": 7.443882942199707, - "learning_rate": 3.585105818702594e-05, - "loss": 0.5848, - "step": 96030 - }, - { - "epoch": 0.849024912038756, - "grad_norm": 20.649703979492188, - "learning_rate": 3.584958479935407e-05, - "loss": 0.7186, - "step": 96040 - }, - { - "epoch": 0.8491133152990682, - "grad_norm": 4.864152908325195, - "learning_rate": 3.58481114116822e-05, - "loss": 0.6077, - "step": 96050 - }, - { - "epoch": 0.8492017185593804, - "grad_norm": 4.269893646240234, - "learning_rate": 3.584663802401033e-05, - "loss": 0.6816, - "step": 96060 - }, - { - "epoch": 0.8492901218196927, - "grad_norm": 2.7612497806549072, - "learning_rate": 3.584516463633846e-05, - "loss": 0.6619, - "step": 96070 - }, - { - "epoch": 0.849378525080005, - "grad_norm": 3.4402577877044678, - "learning_rate": 3.5843691248666585e-05, - "loss": 0.6352, - "step": 96080 - }, - { - "epoch": 0.8494669283403172, - "grad_norm": 2.4338760375976562, - "learning_rate": 3.5842217860994714e-05, - "loss": 0.7769, - "step": 96090 - }, - { - "epoch": 0.8495553316006295, - "grad_norm": 2.6479580402374268, - "learning_rate": 3.584074447332285e-05, - "loss": 0.6805, - "step": 96100 - }, - { - "epoch": 0.8496437348609417, - "grad_norm": 1.8810317516326904, - "learning_rate": 3.583927108565097e-05, - "loss": 0.5493, - "step": 96110 - }, - { - "epoch": 0.8497321381212539, - "grad_norm": 2.2300667762756348, - "learning_rate": 3.5837797697979105e-05, - "loss": 0.6758, - "step": 96120 - }, - { - "epoch": 0.8498205413815662, - "grad_norm": 1.3616703748703003, - "learning_rate": 3.5836324310307234e-05, - "loss": 0.7226, - "step": 96130 - }, - { - "epoch": 0.8499089446418784, - "grad_norm": 2.228010892868042, - "learning_rate": 3.583485092263536e-05, - "loss": 0.6333, - "step": 96140 - }, - { - "epoch": 0.8499973479021906, - "grad_norm": 1.386564016342163, - "learning_rate": 3.583337753496349e-05, - "loss": 0.6214, - "step": 96150 - }, - { - "epoch": 0.8500857511625028, - "grad_norm": 3.9845974445343018, - "learning_rate": 3.5831904147291626e-05, - "loss": 0.7496, - "step": 96160 - }, - { - "epoch": 0.8501741544228151, - "grad_norm": 2.2014803886413574, - "learning_rate": 3.583043075961975e-05, - "loss": 0.7283, - "step": 96170 - }, - { - "epoch": 0.8502625576831273, - "grad_norm": 1.7963542938232422, - "learning_rate": 3.582895737194788e-05, - "loss": 0.5877, - "step": 96180 - }, - { - "epoch": 0.8503509609434396, - "grad_norm": 1.5569453239440918, - "learning_rate": 3.5827483984276004e-05, - "loss": 0.5518, - "step": 96190 - }, - { - "epoch": 0.8504393642037519, - "grad_norm": 2.659372329711914, - "learning_rate": 3.582601059660414e-05, - "loss": 0.6012, - "step": 96200 - }, - { - "epoch": 0.8505277674640641, - "grad_norm": 1.3436371088027954, - "learning_rate": 3.582453720893227e-05, - "loss": 0.6314, - "step": 96210 - }, - { - "epoch": 0.8506161707243763, - "grad_norm": 1.939951777458191, - "learning_rate": 3.5823063821260396e-05, - "loss": 0.7213, - "step": 96220 - }, - { - "epoch": 0.8507045739846886, - "grad_norm": 1.5260087251663208, - "learning_rate": 3.5821590433588524e-05, - "loss": 0.7606, - "step": 96230 - }, - { - "epoch": 0.8507929772450008, - "grad_norm": 3.363067865371704, - "learning_rate": 3.582011704591666e-05, - "loss": 0.6936, - "step": 96240 - }, - { - "epoch": 0.850881380505313, - "grad_norm": 2.567561149597168, - "learning_rate": 3.581864365824478e-05, - "loss": 0.7035, - "step": 96250 - }, - { - "epoch": 0.8509697837656253, - "grad_norm": 2.520526170730591, - "learning_rate": 3.5817170270572916e-05, - "loss": 0.6729, - "step": 96260 - }, - { - "epoch": 0.8510581870259375, - "grad_norm": 16.309490203857422, - "learning_rate": 3.5815696882901044e-05, - "loss": 0.6256, - "step": 96270 - }, - { - "epoch": 0.8511465902862497, - "grad_norm": 2.2490227222442627, - "learning_rate": 3.581422349522917e-05, - "loss": 0.4601, - "step": 96280 - }, - { - "epoch": 0.851234993546562, - "grad_norm": 2.0835492610931396, - "learning_rate": 3.58127501075573e-05, - "loss": 0.5824, - "step": 96290 - }, - { - "epoch": 0.8513233968068742, - "grad_norm": 1.6873316764831543, - "learning_rate": 3.5811276719885436e-05, - "loss": 0.6356, - "step": 96300 - }, - { - "epoch": 0.8514118000671865, - "grad_norm": 2.128718614578247, - "learning_rate": 3.580980333221356e-05, - "loss": 0.601, - "step": 96310 - }, - { - "epoch": 0.8515002033274988, - "grad_norm": 2.577293872833252, - "learning_rate": 3.580832994454169e-05, - "loss": 0.5732, - "step": 96320 - }, - { - "epoch": 0.851588606587811, - "grad_norm": 2.8871302604675293, - "learning_rate": 3.5806856556869814e-05, - "loss": 0.6314, - "step": 96330 - }, - { - "epoch": 0.8516770098481232, - "grad_norm": 1.9625697135925293, - "learning_rate": 3.580538316919795e-05, - "loss": 0.6238, - "step": 96340 - }, - { - "epoch": 0.8517654131084355, - "grad_norm": 3.150153636932373, - "learning_rate": 3.580390978152608e-05, - "loss": 0.5864, - "step": 96350 - }, - { - "epoch": 0.8518538163687477, - "grad_norm": 3.8638978004455566, - "learning_rate": 3.5802436393854206e-05, - "loss": 0.7103, - "step": 96360 - }, - { - "epoch": 0.8519422196290599, - "grad_norm": 3.1687910556793213, - "learning_rate": 3.5800963006182335e-05, - "loss": 0.713, - "step": 96370 - }, - { - "epoch": 0.8520306228893721, - "grad_norm": 2.8374810218811035, - "learning_rate": 3.579948961851047e-05, - "loss": 0.5735, - "step": 96380 - }, - { - "epoch": 0.8521190261496844, - "grad_norm": 1.7191070318222046, - "learning_rate": 3.579801623083859e-05, - "loss": 0.7229, - "step": 96390 - }, - { - "epoch": 0.8522074294099966, - "grad_norm": 3.8795509338378906, - "learning_rate": 3.5796542843166726e-05, - "loss": 0.7042, - "step": 96400 - }, - { - "epoch": 0.8522958326703088, - "grad_norm": 4.817535877227783, - "learning_rate": 3.5795069455494855e-05, - "loss": 0.67, - "step": 96410 - }, - { - "epoch": 0.8523842359306211, - "grad_norm": 4.528602123260498, - "learning_rate": 3.579359606782298e-05, - "loss": 0.681, - "step": 96420 - }, - { - "epoch": 0.8524726391909334, - "grad_norm": 10.816837310791016, - "learning_rate": 3.579212268015111e-05, - "loss": 0.676, - "step": 96430 - }, - { - "epoch": 0.8525610424512456, - "grad_norm": 1.0233919620513916, - "learning_rate": 3.579064929247924e-05, - "loss": 0.5836, - "step": 96440 - }, - { - "epoch": 0.8526494457115579, - "grad_norm": 4.109862327575684, - "learning_rate": 3.578917590480737e-05, - "loss": 0.6727, - "step": 96450 - }, - { - "epoch": 0.8527378489718701, - "grad_norm": 3.049077033996582, - "learning_rate": 3.57877025171355e-05, - "loss": 0.5985, - "step": 96460 - }, - { - "epoch": 0.8528262522321823, - "grad_norm": 2.6511070728302, - "learning_rate": 3.5786229129463625e-05, - "loss": 0.6227, - "step": 96470 - }, - { - "epoch": 0.8529146554924946, - "grad_norm": 2.297149658203125, - "learning_rate": 3.578475574179176e-05, - "loss": 0.6207, - "step": 96480 - }, - { - "epoch": 0.8530030587528068, - "grad_norm": 7.095157623291016, - "learning_rate": 3.578328235411989e-05, - "loss": 0.6466, - "step": 96490 - }, - { - "epoch": 0.853091462013119, - "grad_norm": 16.811241149902344, - "learning_rate": 3.578180896644802e-05, - "loss": 0.6613, - "step": 96500 - }, - { - "epoch": 0.8531798652734313, - "grad_norm": 1.6061618328094482, - "learning_rate": 3.5780335578776145e-05, - "loss": 0.637, - "step": 96510 - }, - { - "epoch": 0.8532682685337435, - "grad_norm": 1.3366936445236206, - "learning_rate": 3.577886219110428e-05, - "loss": 0.7036, - "step": 96520 - }, - { - "epoch": 0.8533566717940557, - "grad_norm": 1.4037444591522217, - "learning_rate": 3.57773888034324e-05, - "loss": 0.6635, - "step": 96530 - }, - { - "epoch": 0.853445075054368, - "grad_norm": 3.01778507232666, - "learning_rate": 3.577591541576054e-05, - "loss": 0.747, - "step": 96540 - }, - { - "epoch": 0.8535334783146803, - "grad_norm": 4.420552730560303, - "learning_rate": 3.577444202808866e-05, - "loss": 0.7034, - "step": 96550 - }, - { - "epoch": 0.8536218815749925, - "grad_norm": 7.216592788696289, - "learning_rate": 3.5772968640416794e-05, - "loss": 0.5377, - "step": 96560 - }, - { - "epoch": 0.8537102848353048, - "grad_norm": 1.2200016975402832, - "learning_rate": 3.577149525274492e-05, - "loss": 0.7169, - "step": 96570 - }, - { - "epoch": 0.853798688095617, - "grad_norm": 2.5889317989349365, - "learning_rate": 3.577002186507305e-05, - "loss": 0.726, - "step": 96580 - }, - { - "epoch": 0.8538870913559292, - "grad_norm": 3.7994658946990967, - "learning_rate": 3.576854847740118e-05, - "loss": 0.6724, - "step": 96590 - }, - { - "epoch": 0.8539754946162414, - "grad_norm": 3.08121919631958, - "learning_rate": 3.5767075089729314e-05, - "loss": 0.5426, - "step": 96600 - }, - { - "epoch": 0.8540638978765537, - "grad_norm": 4.825959205627441, - "learning_rate": 3.5765601702057435e-05, - "loss": 0.6906, - "step": 96610 - }, - { - "epoch": 0.8541523011368659, - "grad_norm": 3.4976179599761963, - "learning_rate": 3.576412831438557e-05, - "loss": 0.6838, - "step": 96620 - }, - { - "epoch": 0.8542407043971781, - "grad_norm": 9.864885330200195, - "learning_rate": 3.57626549267137e-05, - "loss": 0.6538, - "step": 96630 - }, - { - "epoch": 0.8543291076574904, - "grad_norm": 2.7881863117218018, - "learning_rate": 3.576118153904183e-05, - "loss": 0.6598, - "step": 96640 - }, - { - "epoch": 0.8544175109178026, - "grad_norm": 2.6809630393981934, - "learning_rate": 3.5759708151369956e-05, - "loss": 0.6609, - "step": 96650 - }, - { - "epoch": 0.8545059141781148, - "grad_norm": 6.943284511566162, - "learning_rate": 3.575823476369809e-05, - "loss": 0.6044, - "step": 96660 - }, - { - "epoch": 0.8545943174384272, - "grad_norm": 2.844498872756958, - "learning_rate": 3.575676137602621e-05, - "loss": 0.65, - "step": 96670 - }, - { - "epoch": 0.8546827206987394, - "grad_norm": 1.6513187885284424, - "learning_rate": 3.575528798835435e-05, - "loss": 0.6576, - "step": 96680 - }, - { - "epoch": 0.8547711239590516, - "grad_norm": 1.2408981323242188, - "learning_rate": 3.5753814600682476e-05, - "loss": 0.6423, - "step": 96690 - }, - { - "epoch": 0.8548595272193639, - "grad_norm": 3.5790164470672607, - "learning_rate": 3.5752341213010604e-05, - "loss": 0.6824, - "step": 96700 - }, - { - "epoch": 0.8549479304796761, - "grad_norm": 1.7619749307632446, - "learning_rate": 3.575086782533873e-05, - "loss": 0.6511, - "step": 96710 - }, - { - "epoch": 0.8550363337399883, - "grad_norm": 5.983351707458496, - "learning_rate": 3.574939443766686e-05, - "loss": 0.7705, - "step": 96720 - }, - { - "epoch": 0.8551247370003006, - "grad_norm": 2.7571675777435303, - "learning_rate": 3.574792104999499e-05, - "loss": 0.6756, - "step": 96730 - }, - { - "epoch": 0.8552131402606128, - "grad_norm": 2.4864935874938965, - "learning_rate": 3.5746447662323124e-05, - "loss": 0.5314, - "step": 96740 - }, - { - "epoch": 0.855301543520925, - "grad_norm": 5.254898548126221, - "learning_rate": 3.574497427465125e-05, - "loss": 0.7719, - "step": 96750 - }, - { - "epoch": 0.8553899467812373, - "grad_norm": 2.200340747833252, - "learning_rate": 3.574350088697938e-05, - "loss": 0.7374, - "step": 96760 - }, - { - "epoch": 0.8554783500415495, - "grad_norm": 7.612358093261719, - "learning_rate": 3.574202749930751e-05, - "loss": 0.8307, - "step": 96770 - }, - { - "epoch": 0.8555667533018618, - "grad_norm": 3.9470272064208984, - "learning_rate": 3.574055411163564e-05, - "loss": 0.5449, - "step": 96780 - }, - { - "epoch": 0.8556551565621741, - "grad_norm": 4.081993103027344, - "learning_rate": 3.5739080723963766e-05, - "loss": 0.6508, - "step": 96790 - }, - { - "epoch": 0.8557435598224863, - "grad_norm": 2.588196039199829, - "learning_rate": 3.5737607336291895e-05, - "loss": 0.6271, - "step": 96800 - }, - { - "epoch": 0.8558319630827985, - "grad_norm": 3.0945520401000977, - "learning_rate": 3.573613394862003e-05, - "loss": 0.5452, - "step": 96810 - }, - { - "epoch": 0.8559203663431108, - "grad_norm": 7.589072227478027, - "learning_rate": 3.573466056094816e-05, - "loss": 0.7708, - "step": 96820 - }, - { - "epoch": 0.856008769603423, - "grad_norm": 1.6648616790771484, - "learning_rate": 3.5733187173276286e-05, - "loss": 0.5345, - "step": 96830 - }, - { - "epoch": 0.8560971728637352, - "grad_norm": 3.797358989715576, - "learning_rate": 3.5731713785604415e-05, - "loss": 0.6019, - "step": 96840 - }, - { - "epoch": 0.8561855761240474, - "grad_norm": 6.206610679626465, - "learning_rate": 3.573024039793254e-05, - "loss": 0.6814, - "step": 96850 - }, - { - "epoch": 0.8562739793843597, - "grad_norm": 1.350623369216919, - "learning_rate": 3.572876701026067e-05, - "loss": 0.6797, - "step": 96860 - }, - { - "epoch": 0.8563623826446719, - "grad_norm": 4.248816013336182, - "learning_rate": 3.5727293622588807e-05, - "loss": 0.7387, - "step": 96870 - }, - { - "epoch": 0.8564507859049841, - "grad_norm": 1.847179651260376, - "learning_rate": 3.5725820234916935e-05, - "loss": 0.6964, - "step": 96880 - }, - { - "epoch": 0.8565391891652964, - "grad_norm": 2.9017820358276367, - "learning_rate": 3.572434684724506e-05, - "loss": 0.631, - "step": 96890 - }, - { - "epoch": 0.8566275924256087, - "grad_norm": 10.833443641662598, - "learning_rate": 3.572287345957319e-05, - "loss": 0.6257, - "step": 96900 - }, - { - "epoch": 0.8567159956859209, - "grad_norm": 3.561417818069458, - "learning_rate": 3.572140007190132e-05, - "loss": 0.6574, - "step": 96910 - }, - { - "epoch": 0.8568043989462332, - "grad_norm": 0.7631369829177856, - "learning_rate": 3.571992668422945e-05, - "loss": 0.6225, - "step": 96920 - }, - { - "epoch": 0.8568928022065454, - "grad_norm": 6.963668346405029, - "learning_rate": 3.5718453296557583e-05, - "loss": 0.6315, - "step": 96930 - }, - { - "epoch": 0.8569812054668576, - "grad_norm": 1.9811309576034546, - "learning_rate": 3.5716979908885705e-05, - "loss": 0.7407, - "step": 96940 - }, - { - "epoch": 0.8570696087271699, - "grad_norm": 1.2657369375228882, - "learning_rate": 3.571550652121384e-05, - "loss": 0.5438, - "step": 96950 - }, - { - "epoch": 0.8571580119874821, - "grad_norm": 3.8242335319519043, - "learning_rate": 3.571403313354197e-05, - "loss": 0.7064, - "step": 96960 - }, - { - "epoch": 0.8572464152477943, - "grad_norm": 10.111712455749512, - "learning_rate": 3.57125597458701e-05, - "loss": 0.6155, - "step": 96970 - }, - { - "epoch": 0.8573348185081066, - "grad_norm": 1.8088099956512451, - "learning_rate": 3.5711086358198225e-05, - "loss": 0.7048, - "step": 96980 - }, - { - "epoch": 0.8574232217684188, - "grad_norm": 6.075275897979736, - "learning_rate": 3.570961297052636e-05, - "loss": 0.5599, - "step": 96990 - }, - { - "epoch": 0.857511625028731, - "grad_norm": 1.9787936210632324, - "learning_rate": 3.570813958285448e-05, - "loss": 0.7863, - "step": 97000 - }, - { - "epoch": 0.8576000282890432, - "grad_norm": 6.562952518463135, - "learning_rate": 3.570666619518262e-05, - "loss": 0.8221, - "step": 97010 - }, - { - "epoch": 0.8576884315493556, - "grad_norm": 2.168668031692505, - "learning_rate": 3.570519280751074e-05, - "loss": 0.6861, - "step": 97020 - }, - { - "epoch": 0.8577768348096678, - "grad_norm": 6.50564432144165, - "learning_rate": 3.5703719419838874e-05, - "loss": 0.6213, - "step": 97030 - }, - { - "epoch": 0.85786523806998, - "grad_norm": 1.0779777765274048, - "learning_rate": 3.5702246032167e-05, - "loss": 0.5123, - "step": 97040 - }, - { - "epoch": 0.8579536413302923, - "grad_norm": 6.688148498535156, - "learning_rate": 3.570077264449513e-05, - "loss": 0.6077, - "step": 97050 - }, - { - "epoch": 0.8580420445906045, - "grad_norm": 1.3460952043533325, - "learning_rate": 3.569929925682326e-05, - "loss": 0.6679, - "step": 97060 - }, - { - "epoch": 0.8581304478509167, - "grad_norm": 5.407824993133545, - "learning_rate": 3.5697825869151394e-05, - "loss": 0.6355, - "step": 97070 - }, - { - "epoch": 0.858218851111229, - "grad_norm": 8.066926002502441, - "learning_rate": 3.5696352481479516e-05, - "loss": 0.6606, - "step": 97080 - }, - { - "epoch": 0.8583072543715412, - "grad_norm": 5.549952983856201, - "learning_rate": 3.569487909380765e-05, - "loss": 0.6487, - "step": 97090 - }, - { - "epoch": 0.8583956576318534, - "grad_norm": 3.47602915763855, - "learning_rate": 3.569340570613578e-05, - "loss": 0.6682, - "step": 97100 - }, - { - "epoch": 0.8584840608921657, - "grad_norm": 2.4410276412963867, - "learning_rate": 3.569193231846391e-05, - "loss": 0.6071, - "step": 97110 - }, - { - "epoch": 0.8585724641524779, - "grad_norm": 1.9685924053192139, - "learning_rate": 3.5690458930792036e-05, - "loss": 0.7168, - "step": 97120 - }, - { - "epoch": 0.8586608674127901, - "grad_norm": 5.951869964599609, - "learning_rate": 3.568898554312017e-05, - "loss": 0.8174, - "step": 97130 - }, - { - "epoch": 0.8587492706731025, - "grad_norm": 2.647230863571167, - "learning_rate": 3.568751215544829e-05, - "loss": 0.5943, - "step": 97140 - }, - { - "epoch": 0.8588376739334147, - "grad_norm": 4.039038181304932, - "learning_rate": 3.568603876777643e-05, - "loss": 0.7293, - "step": 97150 - }, - { - "epoch": 0.8589260771937269, - "grad_norm": 3.268256664276123, - "learning_rate": 3.568456538010455e-05, - "loss": 0.7301, - "step": 97160 - }, - { - "epoch": 0.8590144804540392, - "grad_norm": 2.228330135345459, - "learning_rate": 3.5683091992432684e-05, - "loss": 0.5719, - "step": 97170 - }, - { - "epoch": 0.8591028837143514, - "grad_norm": 2.9342756271362305, - "learning_rate": 3.568161860476081e-05, - "loss": 0.5967, - "step": 97180 - }, - { - "epoch": 0.8591912869746636, - "grad_norm": 1.1431633234024048, - "learning_rate": 3.568014521708894e-05, - "loss": 0.6203, - "step": 97190 - }, - { - "epoch": 0.8592796902349759, - "grad_norm": 3.2620229721069336, - "learning_rate": 3.567867182941707e-05, - "loss": 0.6046, - "step": 97200 - }, - { - "epoch": 0.8593680934952881, - "grad_norm": 3.122626304626465, - "learning_rate": 3.5677198441745204e-05, - "loss": 0.6107, - "step": 97210 - }, - { - "epoch": 0.8594564967556003, - "grad_norm": 13.108017921447754, - "learning_rate": 3.5675725054073326e-05, - "loss": 0.5778, - "step": 97220 - }, - { - "epoch": 0.8595449000159125, - "grad_norm": 4.802103519439697, - "learning_rate": 3.567425166640146e-05, - "loss": 0.4346, - "step": 97230 - }, - { - "epoch": 0.8596333032762248, - "grad_norm": 12.014464378356934, - "learning_rate": 3.567277827872959e-05, - "loss": 0.6535, - "step": 97240 - }, - { - "epoch": 0.8597217065365371, - "grad_norm": 0.8196065425872803, - "learning_rate": 3.567130489105772e-05, - "loss": 0.7249, - "step": 97250 - }, - { - "epoch": 0.8598101097968494, - "grad_norm": 2.5248751640319824, - "learning_rate": 3.5669831503385846e-05, - "loss": 0.6495, - "step": 97260 - }, - { - "epoch": 0.8598985130571616, - "grad_norm": 2.2289059162139893, - "learning_rate": 3.5668358115713975e-05, - "loss": 0.6034, - "step": 97270 - }, - { - "epoch": 0.8599869163174738, - "grad_norm": 2.478698968887329, - "learning_rate": 3.56668847280421e-05, - "loss": 0.796, - "step": 97280 - }, - { - "epoch": 0.860075319577786, - "grad_norm": 2.4700891971588135, - "learning_rate": 3.566541134037024e-05, - "loss": 0.7364, - "step": 97290 - }, - { - "epoch": 0.8601637228380983, - "grad_norm": 1.2270628213882446, - "learning_rate": 3.566393795269836e-05, - "loss": 0.6514, - "step": 97300 - }, - { - "epoch": 0.8602521260984105, - "grad_norm": 3.766535758972168, - "learning_rate": 3.5662464565026495e-05, - "loss": 0.5957, - "step": 97310 - }, - { - "epoch": 0.8603405293587227, - "grad_norm": 9.726471900939941, - "learning_rate": 3.566099117735462e-05, - "loss": 0.7541, - "step": 97320 - }, - { - "epoch": 0.860428932619035, - "grad_norm": 3.229013681411743, - "learning_rate": 3.565951778968275e-05, - "loss": 0.4931, - "step": 97330 - }, - { - "epoch": 0.8605173358793472, - "grad_norm": 4.683764457702637, - "learning_rate": 3.565804440201088e-05, - "loss": 0.6422, - "step": 97340 - }, - { - "epoch": 0.8606057391396594, - "grad_norm": 2.1976706981658936, - "learning_rate": 3.5656571014339015e-05, - "loss": 0.6622, - "step": 97350 - }, - { - "epoch": 0.8606941423999717, - "grad_norm": 4.245981216430664, - "learning_rate": 3.5655097626667137e-05, - "loss": 0.6518, - "step": 97360 - }, - { - "epoch": 0.860782545660284, - "grad_norm": 1.7333186864852905, - "learning_rate": 3.565362423899527e-05, - "loss": 0.6501, - "step": 97370 - }, - { - "epoch": 0.8608709489205962, - "grad_norm": 1.8159234523773193, - "learning_rate": 3.565215085132339e-05, - "loss": 0.5702, - "step": 97380 - }, - { - "epoch": 0.8609593521809085, - "grad_norm": 10.311392784118652, - "learning_rate": 3.565067746365153e-05, - "loss": 0.7206, - "step": 97390 - }, - { - "epoch": 0.8610477554412207, - "grad_norm": 2.2085134983062744, - "learning_rate": 3.564920407597966e-05, - "loss": 0.7121, - "step": 97400 - }, - { - "epoch": 0.8611361587015329, - "grad_norm": 2.321945905685425, - "learning_rate": 3.5647730688307785e-05, - "loss": 0.6404, - "step": 97410 - }, - { - "epoch": 0.8612245619618452, - "grad_norm": 2.306286573410034, - "learning_rate": 3.5646257300635913e-05, - "loss": 0.6428, - "step": 97420 - }, - { - "epoch": 0.8613129652221574, - "grad_norm": 3.535773992538452, - "learning_rate": 3.564478391296405e-05, - "loss": 0.6817, - "step": 97430 - }, - { - "epoch": 0.8614013684824696, - "grad_norm": 6.402032852172852, - "learning_rate": 3.564331052529217e-05, - "loss": 0.6937, - "step": 97440 - }, - { - "epoch": 0.8614897717427819, - "grad_norm": 2.2330784797668457, - "learning_rate": 3.5641837137620305e-05, - "loss": 0.5796, - "step": 97450 - }, - { - "epoch": 0.8615781750030941, - "grad_norm": 4.886663436889648, - "learning_rate": 3.5640363749948434e-05, - "loss": 0.686, - "step": 97460 - }, - { - "epoch": 0.8616665782634063, - "grad_norm": 2.6288633346557617, - "learning_rate": 3.563889036227656e-05, - "loss": 0.6134, - "step": 97470 - }, - { - "epoch": 0.8617549815237185, - "grad_norm": 12.250592231750488, - "learning_rate": 3.563741697460469e-05, - "loss": 0.708, - "step": 97480 - }, - { - "epoch": 0.8618433847840309, - "grad_norm": 3.854862689971924, - "learning_rate": 3.563594358693282e-05, - "loss": 0.7334, - "step": 97490 - }, - { - "epoch": 0.8619317880443431, - "grad_norm": 4.415807723999023, - "learning_rate": 3.563447019926095e-05, - "loss": 0.6553, - "step": 97500 - }, - { - "epoch": 0.8620201913046553, - "grad_norm": 2.9235692024230957, - "learning_rate": 3.563299681158908e-05, - "loss": 0.6903, - "step": 97510 - }, - { - "epoch": 0.8621085945649676, - "grad_norm": 3.8028318881988525, - "learning_rate": 3.5631523423917204e-05, - "loss": 0.6343, - "step": 97520 - }, - { - "epoch": 0.8621969978252798, - "grad_norm": 6.743467330932617, - "learning_rate": 3.563005003624534e-05, - "loss": 0.635, - "step": 97530 - }, - { - "epoch": 0.862285401085592, - "grad_norm": 1.6084133386611938, - "learning_rate": 3.562857664857347e-05, - "loss": 0.7388, - "step": 97540 - }, - { - "epoch": 0.8623738043459043, - "grad_norm": 10.368175506591797, - "learning_rate": 3.5627103260901596e-05, - "loss": 0.6807, - "step": 97550 - }, - { - "epoch": 0.8624622076062165, - "grad_norm": 12.379048347473145, - "learning_rate": 3.5625629873229724e-05, - "loss": 0.5697, - "step": 97560 - }, - { - "epoch": 0.8625506108665287, - "grad_norm": 3.011819362640381, - "learning_rate": 3.562415648555786e-05, - "loss": 0.6628, - "step": 97570 - }, - { - "epoch": 0.862639014126841, - "grad_norm": 2.7928409576416016, - "learning_rate": 3.562268309788598e-05, - "loss": 0.6936, - "step": 97580 - }, - { - "epoch": 0.8627274173871532, - "grad_norm": 2.027700901031494, - "learning_rate": 3.5621209710214116e-05, - "loss": 0.8069, - "step": 97590 - }, - { - "epoch": 0.8628158206474654, - "grad_norm": 2.958068370819092, - "learning_rate": 3.5619736322542244e-05, - "loss": 0.5896, - "step": 97600 - }, - { - "epoch": 0.8629042239077778, - "grad_norm": 2.9642834663391113, - "learning_rate": 3.561826293487037e-05, - "loss": 0.6321, - "step": 97610 - }, - { - "epoch": 0.86299262716809, - "grad_norm": 1.8911761045455933, - "learning_rate": 3.56167895471985e-05, - "loss": 0.7027, - "step": 97620 - }, - { - "epoch": 0.8630810304284022, - "grad_norm": 2.3212645053863525, - "learning_rate": 3.561531615952663e-05, - "loss": 0.6298, - "step": 97630 - }, - { - "epoch": 0.8631694336887145, - "grad_norm": 5.461788654327393, - "learning_rate": 3.561384277185476e-05, - "loss": 0.5616, - "step": 97640 - }, - { - "epoch": 0.8632578369490267, - "grad_norm": 0.9621568322181702, - "learning_rate": 3.561236938418289e-05, - "loss": 0.6358, - "step": 97650 - }, - { - "epoch": 0.8633462402093389, - "grad_norm": 1.8285093307495117, - "learning_rate": 3.561089599651102e-05, - "loss": 0.6572, - "step": 97660 - }, - { - "epoch": 0.8634346434696512, - "grad_norm": 3.9671151638031006, - "learning_rate": 3.560942260883915e-05, - "loss": 0.595, - "step": 97670 - }, - { - "epoch": 0.8635230467299634, - "grad_norm": 4.02812385559082, - "learning_rate": 3.560794922116728e-05, - "loss": 0.7218, - "step": 97680 - }, - { - "epoch": 0.8636114499902756, - "grad_norm": 3.439793348312378, - "learning_rate": 3.5606475833495406e-05, - "loss": 0.5587, - "step": 97690 - }, - { - "epoch": 0.8636998532505878, - "grad_norm": 6.592374324798584, - "learning_rate": 3.5605002445823534e-05, - "loss": 0.7331, - "step": 97700 - }, - { - "epoch": 0.8637882565109001, - "grad_norm": 1.928765892982483, - "learning_rate": 3.560352905815167e-05, - "loss": 0.689, - "step": 97710 - }, - { - "epoch": 0.8638766597712123, - "grad_norm": 2.3686225414276123, - "learning_rate": 3.56020556704798e-05, - "loss": 0.823, - "step": 97720 - }, - { - "epoch": 0.8639650630315246, - "grad_norm": 2.042149066925049, - "learning_rate": 3.5600582282807926e-05, - "loss": 0.5875, - "step": 97730 - }, - { - "epoch": 0.8640534662918369, - "grad_norm": 3.339426040649414, - "learning_rate": 3.5599108895136055e-05, - "loss": 0.6725, - "step": 97740 - }, - { - "epoch": 0.8641418695521491, - "grad_norm": 1.7333118915557861, - "learning_rate": 3.559763550746418e-05, - "loss": 0.6748, - "step": 97750 - }, - { - "epoch": 0.8642302728124613, - "grad_norm": 4.163698673248291, - "learning_rate": 3.559616211979231e-05, - "loss": 0.6952, - "step": 97760 - }, - { - "epoch": 0.8643186760727736, - "grad_norm": 1.8396464586257935, - "learning_rate": 3.559468873212044e-05, - "loss": 0.6029, - "step": 97770 - }, - { - "epoch": 0.8644070793330858, - "grad_norm": 4.963450908660889, - "learning_rate": 3.5593215344448575e-05, - "loss": 0.7147, - "step": 97780 - }, - { - "epoch": 0.864495482593398, - "grad_norm": 6.767226696014404, - "learning_rate": 3.55917419567767e-05, - "loss": 0.6666, - "step": 97790 - }, - { - "epoch": 0.8645838858537103, - "grad_norm": 7.092291831970215, - "learning_rate": 3.559026856910483e-05, - "loss": 0.5999, - "step": 97800 - }, - { - "epoch": 0.8646722891140225, - "grad_norm": 0.9363610744476318, - "learning_rate": 3.558879518143296e-05, - "loss": 0.5209, - "step": 97810 - }, - { - "epoch": 0.8647606923743347, - "grad_norm": 1.8100072145462036, - "learning_rate": 3.558732179376109e-05, - "loss": 0.6536, - "step": 97820 - }, - { - "epoch": 0.864849095634647, - "grad_norm": 2.556466579437256, - "learning_rate": 3.558584840608922e-05, - "loss": 0.6147, - "step": 97830 - }, - { - "epoch": 0.8649374988949593, - "grad_norm": 2.9554662704467773, - "learning_rate": 3.558437501841735e-05, - "loss": 0.6924, - "step": 97840 - }, - { - "epoch": 0.8650259021552715, - "grad_norm": 2.7062246799468994, - "learning_rate": 3.558290163074547e-05, - "loss": 0.6442, - "step": 97850 - }, - { - "epoch": 0.8651143054155838, - "grad_norm": 1.5560581684112549, - "learning_rate": 3.558142824307361e-05, - "loss": 0.6401, - "step": 97860 - }, - { - "epoch": 0.865202708675896, - "grad_norm": 9.17304515838623, - "learning_rate": 3.557995485540174e-05, - "loss": 0.7126, - "step": 97870 - }, - { - "epoch": 0.8652911119362082, - "grad_norm": 3.902885913848877, - "learning_rate": 3.5578481467729865e-05, - "loss": 0.6815, - "step": 97880 - }, - { - "epoch": 0.8653795151965205, - "grad_norm": 7.74009895324707, - "learning_rate": 3.5577008080057994e-05, - "loss": 0.6422, - "step": 97890 - }, - { - "epoch": 0.8654679184568327, - "grad_norm": 5.859641075134277, - "learning_rate": 3.557553469238613e-05, - "loss": 0.6396, - "step": 97900 - }, - { - "epoch": 0.8655563217171449, - "grad_norm": 4.236804962158203, - "learning_rate": 3.557406130471425e-05, - "loss": 0.5992, - "step": 97910 - }, - { - "epoch": 0.8656447249774571, - "grad_norm": 6.393752574920654, - "learning_rate": 3.5572587917042385e-05, - "loss": 0.5366, - "step": 97920 - }, - { - "epoch": 0.8657331282377694, - "grad_norm": 3.365983247756958, - "learning_rate": 3.5571114529370514e-05, - "loss": 0.7184, - "step": 97930 - }, - { - "epoch": 0.8658215314980816, - "grad_norm": 3.524007797241211, - "learning_rate": 3.556964114169864e-05, - "loss": 0.8267, - "step": 97940 - }, - { - "epoch": 0.8659099347583938, - "grad_norm": 1.3623628616333008, - "learning_rate": 3.556816775402677e-05, - "loss": 0.5764, - "step": 97950 - }, - { - "epoch": 0.8659983380187062, - "grad_norm": 1.6860737800598145, - "learning_rate": 3.55666943663549e-05, - "loss": 0.6355, - "step": 97960 - }, - { - "epoch": 0.8660867412790184, - "grad_norm": 10.27686882019043, - "learning_rate": 3.556522097868303e-05, - "loss": 0.6504, - "step": 97970 - }, - { - "epoch": 0.8661751445393306, - "grad_norm": 2.0946044921875, - "learning_rate": 3.556374759101116e-05, - "loss": 0.6157, - "step": 97980 - }, - { - "epoch": 0.8662635477996429, - "grad_norm": 3.56308913230896, - "learning_rate": 3.5562274203339284e-05, - "loss": 0.5911, - "step": 97990 - }, - { - "epoch": 0.8663519510599551, - "grad_norm": 6.122189044952393, - "learning_rate": 3.556080081566742e-05, - "loss": 0.6597, - "step": 98000 - }, - { - "epoch": 0.8664403543202673, - "grad_norm": 2.2405266761779785, - "learning_rate": 3.555932742799555e-05, - "loss": 0.6425, - "step": 98010 - }, - { - "epoch": 0.8665287575805796, - "grad_norm": 4.223904132843018, - "learning_rate": 3.5557854040323676e-05, - "loss": 0.6877, - "step": 98020 - }, - { - "epoch": 0.8666171608408918, - "grad_norm": 5.597757339477539, - "learning_rate": 3.5556380652651804e-05, - "loss": 0.6597, - "step": 98030 - }, - { - "epoch": 0.866705564101204, - "grad_norm": 6.285776138305664, - "learning_rate": 3.555490726497994e-05, - "loss": 0.7034, - "step": 98040 - }, - { - "epoch": 0.8667939673615163, - "grad_norm": 3.9177229404449463, - "learning_rate": 3.555343387730806e-05, - "loss": 0.6602, - "step": 98050 - }, - { - "epoch": 0.8668823706218285, - "grad_norm": 1.1600656509399414, - "learning_rate": 3.5551960489636196e-05, - "loss": 0.8058, - "step": 98060 - }, - { - "epoch": 0.8669707738821407, - "grad_norm": 3.8851804733276367, - "learning_rate": 3.5550487101964324e-05, - "loss": 0.6659, - "step": 98070 - }, - { - "epoch": 0.8670591771424531, - "grad_norm": 8.677483558654785, - "learning_rate": 3.554901371429245e-05, - "loss": 0.6912, - "step": 98080 - }, - { - "epoch": 0.8671475804027653, - "grad_norm": 2.5365614891052246, - "learning_rate": 3.554754032662058e-05, - "loss": 0.669, - "step": 98090 - }, - { - "epoch": 0.8672359836630775, - "grad_norm": 2.035731077194214, - "learning_rate": 3.554606693894871e-05, - "loss": 0.6094, - "step": 98100 - }, - { - "epoch": 0.8673243869233898, - "grad_norm": 2.49967098236084, - "learning_rate": 3.554459355127684e-05, - "loss": 0.6024, - "step": 98110 - }, - { - "epoch": 0.867412790183702, - "grad_norm": 4.827687740325928, - "learning_rate": 3.554312016360497e-05, - "loss": 0.7174, - "step": 98120 - }, - { - "epoch": 0.8675011934440142, - "grad_norm": 1.857065200805664, - "learning_rate": 3.5541646775933094e-05, - "loss": 0.6971, - "step": 98130 - }, - { - "epoch": 0.8675895967043264, - "grad_norm": 2.1943271160125732, - "learning_rate": 3.554017338826123e-05, - "loss": 0.7467, - "step": 98140 - }, - { - "epoch": 0.8676779999646387, - "grad_norm": 7.98257303237915, - "learning_rate": 3.553870000058936e-05, - "loss": 0.6916, - "step": 98150 - }, - { - "epoch": 0.8677664032249509, - "grad_norm": 4.576429843902588, - "learning_rate": 3.5537226612917486e-05, - "loss": 0.5785, - "step": 98160 - }, - { - "epoch": 0.8678548064852631, - "grad_norm": 3.215961217880249, - "learning_rate": 3.5535753225245615e-05, - "loss": 0.6113, - "step": 98170 - }, - { - "epoch": 0.8679432097455754, - "grad_norm": 5.609352111816406, - "learning_rate": 3.553427983757375e-05, - "loss": 0.628, - "step": 98180 - }, - { - "epoch": 0.8680316130058876, - "grad_norm": 2.508380174636841, - "learning_rate": 3.553280644990187e-05, - "loss": 0.7088, - "step": 98190 - }, - { - "epoch": 0.8681200162662, - "grad_norm": 1.774983525276184, - "learning_rate": 3.5531333062230006e-05, - "loss": 0.6589, - "step": 98200 - }, - { - "epoch": 0.8682084195265122, - "grad_norm": 3.499372720718384, - "learning_rate": 3.552985967455813e-05, - "loss": 0.6775, - "step": 98210 - }, - { - "epoch": 0.8682968227868244, - "grad_norm": 6.5042724609375, - "learning_rate": 3.552838628688626e-05, - "loss": 0.7656, - "step": 98220 - }, - { - "epoch": 0.8683852260471366, - "grad_norm": 3.096292495727539, - "learning_rate": 3.552691289921439e-05, - "loss": 0.7369, - "step": 98230 - }, - { - "epoch": 0.8684736293074489, - "grad_norm": 3.5823757648468018, - "learning_rate": 3.552543951154252e-05, - "loss": 0.7006, - "step": 98240 - }, - { - "epoch": 0.8685620325677611, - "grad_norm": 1.5179250240325928, - "learning_rate": 3.552396612387065e-05, - "loss": 0.7063, - "step": 98250 - }, - { - "epoch": 0.8686504358280733, - "grad_norm": 3.8673157691955566, - "learning_rate": 3.552249273619878e-05, - "loss": 0.6496, - "step": 98260 - }, - { - "epoch": 0.8687388390883856, - "grad_norm": 2.3928096294403076, - "learning_rate": 3.5521019348526905e-05, - "loss": 0.7631, - "step": 98270 - }, - { - "epoch": 0.8688272423486978, - "grad_norm": 3.0162246227264404, - "learning_rate": 3.551954596085504e-05, - "loss": 0.5955, - "step": 98280 - }, - { - "epoch": 0.86891564560901, - "grad_norm": 11.222826957702637, - "learning_rate": 3.551807257318317e-05, - "loss": 0.6608, - "step": 98290 - }, - { - "epoch": 0.8690040488693223, - "grad_norm": 3.427701234817505, - "learning_rate": 3.55165991855113e-05, - "loss": 0.5962, - "step": 98300 - }, - { - "epoch": 0.8690924521296346, - "grad_norm": 4.837122917175293, - "learning_rate": 3.5515125797839425e-05, - "loss": 0.7147, - "step": 98310 - }, - { - "epoch": 0.8691808553899468, - "grad_norm": 20.0013370513916, - "learning_rate": 3.5513652410167553e-05, - "loss": 0.5389, - "step": 98320 - }, - { - "epoch": 0.8692692586502591, - "grad_norm": 1.3774549961090088, - "learning_rate": 3.551217902249568e-05, - "loss": 0.5439, - "step": 98330 - }, - { - "epoch": 0.8693576619105713, - "grad_norm": 4.33914852142334, - "learning_rate": 3.551070563482382e-05, - "loss": 0.6164, - "step": 98340 - }, - { - "epoch": 0.8694460651708835, - "grad_norm": 2.696580648422241, - "learning_rate": 3.550923224715194e-05, - "loss": 0.6114, - "step": 98350 - }, - { - "epoch": 0.8695344684311958, - "grad_norm": 1.8292914628982544, - "learning_rate": 3.5507758859480074e-05, - "loss": 0.7744, - "step": 98360 - }, - { - "epoch": 0.869622871691508, - "grad_norm": 7.250606536865234, - "learning_rate": 3.55062854718082e-05, - "loss": 0.5779, - "step": 98370 - }, - { - "epoch": 0.8697112749518202, - "grad_norm": 6.369245529174805, - "learning_rate": 3.550481208413633e-05, - "loss": 0.639, - "step": 98380 - }, - { - "epoch": 0.8697996782121324, - "grad_norm": 19.52463722229004, - "learning_rate": 3.550333869646446e-05, - "loss": 0.5768, - "step": 98390 - }, - { - "epoch": 0.8698880814724447, - "grad_norm": 5.448720455169678, - "learning_rate": 3.5501865308792594e-05, - "loss": 0.5667, - "step": 98400 - }, - { - "epoch": 0.8699764847327569, - "grad_norm": 5.967934608459473, - "learning_rate": 3.5500391921120715e-05, - "loss": 0.5591, - "step": 98410 - }, - { - "epoch": 0.8700648879930691, - "grad_norm": 3.537064790725708, - "learning_rate": 3.549891853344885e-05, - "loss": 0.6367, - "step": 98420 - }, - { - "epoch": 0.8701532912533815, - "grad_norm": 2.089097738265991, - "learning_rate": 3.549744514577697e-05, - "loss": 0.8827, - "step": 98430 - }, - { - "epoch": 0.8702416945136937, - "grad_norm": 3.187114953994751, - "learning_rate": 3.549597175810511e-05, - "loss": 0.6592, - "step": 98440 - }, - { - "epoch": 0.8703300977740059, - "grad_norm": 1.6178146600723267, - "learning_rate": 3.5494498370433236e-05, - "loss": 0.631, - "step": 98450 - }, - { - "epoch": 0.8704185010343182, - "grad_norm": 4.6605448722839355, - "learning_rate": 3.5493024982761364e-05, - "loss": 0.7185, - "step": 98460 - }, - { - "epoch": 0.8705069042946304, - "grad_norm": 1.473974347114563, - "learning_rate": 3.549155159508949e-05, - "loss": 0.6521, - "step": 98470 - }, - { - "epoch": 0.8705953075549426, - "grad_norm": 2.387791633605957, - "learning_rate": 3.549007820741763e-05, - "loss": 0.6081, - "step": 98480 - }, - { - "epoch": 0.8706837108152549, - "grad_norm": 7.768845081329346, - "learning_rate": 3.548860481974575e-05, - "loss": 0.6605, - "step": 98490 - }, - { - "epoch": 0.8707721140755671, - "grad_norm": 32.79899978637695, - "learning_rate": 3.5487131432073884e-05, - "loss": 0.5928, - "step": 98500 - }, - { - "epoch": 0.8708605173358793, - "grad_norm": 4.767167091369629, - "learning_rate": 3.548565804440201e-05, - "loss": 0.7422, - "step": 98510 - }, - { - "epoch": 0.8709489205961916, - "grad_norm": 2.4973831176757812, - "learning_rate": 3.548418465673014e-05, - "loss": 0.6895, - "step": 98520 - }, - { - "epoch": 0.8710373238565038, - "grad_norm": 3.401088237762451, - "learning_rate": 3.548271126905827e-05, - "loss": 0.6381, - "step": 98530 - }, - { - "epoch": 0.871125727116816, - "grad_norm": 0.8923147916793823, - "learning_rate": 3.5481237881386404e-05, - "loss": 0.6686, - "step": 98540 - }, - { - "epoch": 0.8712141303771284, - "grad_norm": 1.5152621269226074, - "learning_rate": 3.5479764493714526e-05, - "loss": 0.5468, - "step": 98550 - }, - { - "epoch": 0.8713025336374406, - "grad_norm": 4.025765419006348, - "learning_rate": 3.547829110604266e-05, - "loss": 0.6428, - "step": 98560 - }, - { - "epoch": 0.8713909368977528, - "grad_norm": 5.059818267822266, - "learning_rate": 3.547681771837079e-05, - "loss": 0.5825, - "step": 98570 - }, - { - "epoch": 0.871479340158065, - "grad_norm": 6.144792079925537, - "learning_rate": 3.547534433069892e-05, - "loss": 0.8703, - "step": 98580 - }, - { - "epoch": 0.8715677434183773, - "grad_norm": 2.613663673400879, - "learning_rate": 3.5473870943027046e-05, - "loss": 0.7718, - "step": 98590 - }, - { - "epoch": 0.8716561466786895, - "grad_norm": 2.0589616298675537, - "learning_rate": 3.5472397555355174e-05, - "loss": 0.6489, - "step": 98600 - }, - { - "epoch": 0.8717445499390017, - "grad_norm": 31.750431060791016, - "learning_rate": 3.54709241676833e-05, - "loss": 0.6047, - "step": 98610 - }, - { - "epoch": 0.871832953199314, - "grad_norm": 1.3656587600708008, - "learning_rate": 3.546945078001144e-05, - "loss": 0.6384, - "step": 98620 - }, - { - "epoch": 0.8719213564596262, - "grad_norm": 3.833303451538086, - "learning_rate": 3.5467977392339566e-05, - "loss": 0.508, - "step": 98630 - }, - { - "epoch": 0.8720097597199384, - "grad_norm": 0.9208725690841675, - "learning_rate": 3.5466504004667695e-05, - "loss": 0.6131, - "step": 98640 - }, - { - "epoch": 0.8720981629802507, - "grad_norm": 1.3009897470474243, - "learning_rate": 3.546503061699582e-05, - "loss": 0.6773, - "step": 98650 - }, - { - "epoch": 0.8721865662405629, - "grad_norm": 7.807600975036621, - "learning_rate": 3.546355722932395e-05, - "loss": 0.6227, - "step": 98660 - }, - { - "epoch": 0.8722749695008752, - "grad_norm": 1.3163272142410278, - "learning_rate": 3.546208384165208e-05, - "loss": 0.6966, - "step": 98670 - }, - { - "epoch": 0.8723633727611875, - "grad_norm": 3.8006227016448975, - "learning_rate": 3.546061045398021e-05, - "loss": 0.6701, - "step": 98680 - }, - { - "epoch": 0.8724517760214997, - "grad_norm": 1.7885210514068604, - "learning_rate": 3.545913706630834e-05, - "loss": 0.5752, - "step": 98690 - }, - { - "epoch": 0.8725401792818119, - "grad_norm": 1.1173492670059204, - "learning_rate": 3.545766367863647e-05, - "loss": 0.703, - "step": 98700 - }, - { - "epoch": 0.8726285825421242, - "grad_norm": 2.2168967723846436, - "learning_rate": 3.54561902909646e-05, - "loss": 0.6392, - "step": 98710 - }, - { - "epoch": 0.8727169858024364, - "grad_norm": 4.935361385345459, - "learning_rate": 3.545471690329273e-05, - "loss": 0.6871, - "step": 98720 - }, - { - "epoch": 0.8728053890627486, - "grad_norm": 12.358054161071777, - "learning_rate": 3.5453243515620857e-05, - "loss": 0.5707, - "step": 98730 - }, - { - "epoch": 0.8728937923230609, - "grad_norm": 1.3461123704910278, - "learning_rate": 3.5451770127948985e-05, - "loss": 0.5944, - "step": 98740 - }, - { - "epoch": 0.8729821955833731, - "grad_norm": 2.271646738052368, - "learning_rate": 3.545029674027712e-05, - "loss": 0.5627, - "step": 98750 - }, - { - "epoch": 0.8730705988436853, - "grad_norm": 2.8260445594787598, - "learning_rate": 3.544882335260525e-05, - "loss": 0.6882, - "step": 98760 - }, - { - "epoch": 0.8731590021039976, - "grad_norm": 5.59370756149292, - "learning_rate": 3.544734996493338e-05, - "loss": 0.7472, - "step": 98770 - }, - { - "epoch": 0.8732474053643098, - "grad_norm": 4.746335983276367, - "learning_rate": 3.5445876577261505e-05, - "loss": 0.6872, - "step": 98780 - }, - { - "epoch": 0.8733358086246221, - "grad_norm": 5.156048774719238, - "learning_rate": 3.5444403189589633e-05, - "loss": 0.6404, - "step": 98790 - }, - { - "epoch": 0.8734242118849344, - "grad_norm": 2.1542906761169434, - "learning_rate": 3.544292980191776e-05, - "loss": 0.5713, - "step": 98800 - }, - { - "epoch": 0.8735126151452466, - "grad_norm": 3.502044439315796, - "learning_rate": 3.54414564142459e-05, - "loss": 0.6576, - "step": 98810 - }, - { - "epoch": 0.8736010184055588, - "grad_norm": 5.644341468811035, - "learning_rate": 3.543998302657402e-05, - "loss": 0.6506, - "step": 98820 - }, - { - "epoch": 0.873689421665871, - "grad_norm": 8.779607772827148, - "learning_rate": 3.5438509638902154e-05, - "loss": 0.6885, - "step": 98830 - }, - { - "epoch": 0.8737778249261833, - "grad_norm": 3.6800928115844727, - "learning_rate": 3.543703625123028e-05, - "loss": 0.6447, - "step": 98840 - }, - { - "epoch": 0.8738662281864955, - "grad_norm": 1.630843997001648, - "learning_rate": 3.543556286355841e-05, - "loss": 0.5899, - "step": 98850 - }, - { - "epoch": 0.8739546314468077, - "grad_norm": 1.7254748344421387, - "learning_rate": 3.543408947588654e-05, - "loss": 0.6351, - "step": 98860 - }, - { - "epoch": 0.87404303470712, - "grad_norm": 2.0491297245025635, - "learning_rate": 3.5432616088214674e-05, - "loss": 0.628, - "step": 98870 - }, - { - "epoch": 0.8741314379674322, - "grad_norm": 5.019067287445068, - "learning_rate": 3.5431142700542795e-05, - "loss": 0.6937, - "step": 98880 - }, - { - "epoch": 0.8742198412277444, - "grad_norm": 9.280817985534668, - "learning_rate": 3.542966931287093e-05, - "loss": 0.595, - "step": 98890 - }, - { - "epoch": 0.8743082444880568, - "grad_norm": 3.0402538776397705, - "learning_rate": 3.542819592519905e-05, - "loss": 0.6988, - "step": 98900 - }, - { - "epoch": 0.874396647748369, - "grad_norm": 4.510800361633301, - "learning_rate": 3.542672253752719e-05, - "loss": 0.6386, - "step": 98910 - }, - { - "epoch": 0.8744850510086812, - "grad_norm": 5.1762847900390625, - "learning_rate": 3.5425249149855316e-05, - "loss": 0.6576, - "step": 98920 - }, - { - "epoch": 0.8745734542689935, - "grad_norm": 2.7324318885803223, - "learning_rate": 3.5423775762183444e-05, - "loss": 0.7281, - "step": 98930 - }, - { - "epoch": 0.8746618575293057, - "grad_norm": 2.619001865386963, - "learning_rate": 3.542230237451157e-05, - "loss": 0.7271, - "step": 98940 - }, - { - "epoch": 0.8747502607896179, - "grad_norm": 2.3867146968841553, - "learning_rate": 3.542082898683971e-05, - "loss": 0.6574, - "step": 98950 - }, - { - "epoch": 0.8748386640499302, - "grad_norm": 4.602837562561035, - "learning_rate": 3.541935559916783e-05, - "loss": 0.5304, - "step": 98960 - }, - { - "epoch": 0.8749270673102424, - "grad_norm": 3.5285396575927734, - "learning_rate": 3.5417882211495964e-05, - "loss": 0.7435, - "step": 98970 - }, - { - "epoch": 0.8750154705705546, - "grad_norm": 2.4819743633270264, - "learning_rate": 3.541640882382409e-05, - "loss": 0.6965, - "step": 98980 - }, - { - "epoch": 0.8751038738308669, - "grad_norm": 2.2841880321502686, - "learning_rate": 3.541493543615222e-05, - "loss": 0.6337, - "step": 98990 - }, - { - "epoch": 0.8751922770911791, - "grad_norm": 6.790390968322754, - "learning_rate": 3.541346204848035e-05, - "loss": 0.5454, - "step": 99000 - }, - { - "epoch": 0.8752806803514913, - "grad_norm": 1.1564199924468994, - "learning_rate": 3.5411988660808484e-05, - "loss": 0.634, - "step": 99010 - }, - { - "epoch": 0.8753690836118037, - "grad_norm": 10.727293014526367, - "learning_rate": 3.5410515273136606e-05, - "loss": 0.6872, - "step": 99020 - }, - { - "epoch": 0.8754574868721159, - "grad_norm": 6.787434101104736, - "learning_rate": 3.540904188546474e-05, - "loss": 0.6958, - "step": 99030 - }, - { - "epoch": 0.8755458901324281, - "grad_norm": 8.093029022216797, - "learning_rate": 3.540756849779286e-05, - "loss": 0.6648, - "step": 99040 - }, - { - "epoch": 0.8756342933927403, - "grad_norm": 2.7870447635650635, - "learning_rate": 3.5406095110121e-05, - "loss": 0.5873, - "step": 99050 - }, - { - "epoch": 0.8757226966530526, - "grad_norm": 2.5037841796875, - "learning_rate": 3.5404621722449126e-05, - "loss": 0.6493, - "step": 99060 - }, - { - "epoch": 0.8758110999133648, - "grad_norm": 1.922541618347168, - "learning_rate": 3.5403148334777254e-05, - "loss": 0.5845, - "step": 99070 - }, - { - "epoch": 0.875899503173677, - "grad_norm": 3.1529664993286133, - "learning_rate": 3.540167494710538e-05, - "loss": 0.4853, - "step": 99080 - }, - { - "epoch": 0.8759879064339893, - "grad_norm": 1.3723474740982056, - "learning_rate": 3.540020155943352e-05, - "loss": 0.6435, - "step": 99090 - }, - { - "epoch": 0.8760763096943015, - "grad_norm": 4.011801719665527, - "learning_rate": 3.539872817176164e-05, - "loss": 0.6664, - "step": 99100 - }, - { - "epoch": 0.8761647129546137, - "grad_norm": 3.2729763984680176, - "learning_rate": 3.5397254784089775e-05, - "loss": 0.7014, - "step": 99110 - }, - { - "epoch": 0.876253116214926, - "grad_norm": 4.137759208679199, - "learning_rate": 3.53957813964179e-05, - "loss": 0.6752, - "step": 99120 - }, - { - "epoch": 0.8763415194752382, - "grad_norm": 3.8527872562408447, - "learning_rate": 3.539430800874603e-05, - "loss": 0.7016, - "step": 99130 - }, - { - "epoch": 0.8764299227355505, - "grad_norm": 3.8789100646972656, - "learning_rate": 3.539283462107416e-05, - "loss": 0.6849, - "step": 99140 - }, - { - "epoch": 0.8765183259958628, - "grad_norm": 5.719064235687256, - "learning_rate": 3.539136123340229e-05, - "loss": 0.7762, - "step": 99150 - }, - { - "epoch": 0.876606729256175, - "grad_norm": 1.9379489421844482, - "learning_rate": 3.5389887845730416e-05, - "loss": 0.7892, - "step": 99160 - }, - { - "epoch": 0.8766951325164872, - "grad_norm": 2.9722301959991455, - "learning_rate": 3.538841445805855e-05, - "loss": 0.7086, - "step": 99170 - }, - { - "epoch": 0.8767835357767995, - "grad_norm": 16.889270782470703, - "learning_rate": 3.538694107038667e-05, - "loss": 0.6332, - "step": 99180 - }, - { - "epoch": 0.8768719390371117, - "grad_norm": 2.966301202774048, - "learning_rate": 3.538546768271481e-05, - "loss": 0.6073, - "step": 99190 - }, - { - "epoch": 0.8769603422974239, - "grad_norm": 3.1727070808410645, - "learning_rate": 3.538399429504294e-05, - "loss": 0.6174, - "step": 99200 - }, - { - "epoch": 0.8770487455577362, - "grad_norm": 5.121644496917725, - "learning_rate": 3.5382520907371065e-05, - "loss": 0.6423, - "step": 99210 - }, - { - "epoch": 0.8771371488180484, - "grad_norm": 9.327658653259277, - "learning_rate": 3.538104751969919e-05, - "loss": 0.6135, - "step": 99220 - }, - { - "epoch": 0.8772255520783606, - "grad_norm": 1.8207581043243408, - "learning_rate": 3.537957413202733e-05, - "loss": 0.7655, - "step": 99230 - }, - { - "epoch": 0.8773139553386728, - "grad_norm": 5.972422122955322, - "learning_rate": 3.537810074435545e-05, - "loss": 0.7373, - "step": 99240 - }, - { - "epoch": 0.8774023585989851, - "grad_norm": 3.4334843158721924, - "learning_rate": 3.5376627356683585e-05, - "loss": 0.7113, - "step": 99250 - }, - { - "epoch": 0.8774907618592974, - "grad_norm": 21.4027099609375, - "learning_rate": 3.537515396901171e-05, - "loss": 0.6069, - "step": 99260 - }, - { - "epoch": 0.8775791651196096, - "grad_norm": 1.0649467706680298, - "learning_rate": 3.537368058133984e-05, - "loss": 0.6434, - "step": 99270 - }, - { - "epoch": 0.8776675683799219, - "grad_norm": 1.082999348640442, - "learning_rate": 3.537220719366797e-05, - "loss": 0.5167, - "step": 99280 - }, - { - "epoch": 0.8777559716402341, - "grad_norm": 2.8941519260406494, - "learning_rate": 3.53707338059961e-05, - "loss": 0.6574, - "step": 99290 - }, - { - "epoch": 0.8778443749005463, - "grad_norm": 12.018267631530762, - "learning_rate": 3.536926041832423e-05, - "loss": 0.7463, - "step": 99300 - }, - { - "epoch": 0.8779327781608586, - "grad_norm": 1.1627510786056519, - "learning_rate": 3.536778703065236e-05, - "loss": 0.4938, - "step": 99310 - }, - { - "epoch": 0.8780211814211708, - "grad_norm": 1.0448715686798096, - "learning_rate": 3.5366313642980484e-05, - "loss": 0.6823, - "step": 99320 - }, - { - "epoch": 0.878109584681483, - "grad_norm": 0.8962799310684204, - "learning_rate": 3.536484025530862e-05, - "loss": 0.5619, - "step": 99330 - }, - { - "epoch": 0.8781979879417953, - "grad_norm": 5.345099925994873, - "learning_rate": 3.536336686763675e-05, - "loss": 0.6446, - "step": 99340 - }, - { - "epoch": 0.8782863912021075, - "grad_norm": 1.104266881942749, - "learning_rate": 3.5361893479964875e-05, - "loss": 0.6198, - "step": 99350 - }, - { - "epoch": 0.8783747944624197, - "grad_norm": 7.238385200500488, - "learning_rate": 3.5360420092293004e-05, - "loss": 0.7927, - "step": 99360 - }, - { - "epoch": 0.8784631977227321, - "grad_norm": 2.3845322132110596, - "learning_rate": 3.535894670462113e-05, - "loss": 0.5904, - "step": 99370 - }, - { - "epoch": 0.8785516009830443, - "grad_norm": 5.77187442779541, - "learning_rate": 3.535747331694926e-05, - "loss": 0.6724, - "step": 99380 - }, - { - "epoch": 0.8786400042433565, - "grad_norm": 6.896483898162842, - "learning_rate": 3.5355999929277396e-05, - "loss": 0.6764, - "step": 99390 - }, - { - "epoch": 0.8787284075036688, - "grad_norm": 3.7183151245117188, - "learning_rate": 3.535452654160552e-05, - "loss": 0.6929, - "step": 99400 - }, - { - "epoch": 0.878816810763981, - "grad_norm": 1.867093563079834, - "learning_rate": 3.535305315393365e-05, - "loss": 0.6756, - "step": 99410 - }, - { - "epoch": 0.8789052140242932, - "grad_norm": 11.129083633422852, - "learning_rate": 3.535157976626178e-05, - "loss": 0.6851, - "step": 99420 - }, - { - "epoch": 0.8789936172846055, - "grad_norm": 2.918914556503296, - "learning_rate": 3.535010637858991e-05, - "loss": 0.7333, - "step": 99430 - }, - { - "epoch": 0.8790820205449177, - "grad_norm": 2.4519002437591553, - "learning_rate": 3.534863299091804e-05, - "loss": 0.5644, - "step": 99440 - }, - { - "epoch": 0.8791704238052299, - "grad_norm": 1.9421651363372803, - "learning_rate": 3.534715960324617e-05, - "loss": 0.7968, - "step": 99450 - }, - { - "epoch": 0.8792588270655421, - "grad_norm": 2.8746142387390137, - "learning_rate": 3.5345686215574294e-05, - "loss": 0.5139, - "step": 99460 - }, - { - "epoch": 0.8793472303258544, - "grad_norm": 3.651703119277954, - "learning_rate": 3.534421282790243e-05, - "loss": 0.6729, - "step": 99470 - }, - { - "epoch": 0.8794356335861666, - "grad_norm": 7.4472503662109375, - "learning_rate": 3.534273944023056e-05, - "loss": 0.5204, - "step": 99480 - }, - { - "epoch": 0.879524036846479, - "grad_norm": 3.8146281242370605, - "learning_rate": 3.5341266052558686e-05, - "loss": 0.706, - "step": 99490 - }, - { - "epoch": 0.8796124401067912, - "grad_norm": 6.142029285430908, - "learning_rate": 3.5339792664886814e-05, - "loss": 0.6076, - "step": 99500 - }, - { - "epoch": 0.8797008433671034, - "grad_norm": 2.7834155559539795, - "learning_rate": 3.533831927721494e-05, - "loss": 0.5998, - "step": 99510 - }, - { - "epoch": 0.8797892466274156, - "grad_norm": 4.0486836433410645, - "learning_rate": 3.533684588954307e-05, - "loss": 0.6007, - "step": 99520 - }, - { - "epoch": 0.8798776498877279, - "grad_norm": 2.0358059406280518, - "learning_rate": 3.5335372501871206e-05, - "loss": 0.5999, - "step": 99530 - }, - { - "epoch": 0.8799660531480401, - "grad_norm": 8.948349952697754, - "learning_rate": 3.5333899114199335e-05, - "loss": 0.8091, - "step": 99540 - }, - { - "epoch": 0.8800544564083523, - "grad_norm": 5.502082824707031, - "learning_rate": 3.533242572652746e-05, - "loss": 0.6152, - "step": 99550 - }, - { - "epoch": 0.8801428596686646, - "grad_norm": 1.0880204439163208, - "learning_rate": 3.533095233885559e-05, - "loss": 0.6501, - "step": 99560 - }, - { - "epoch": 0.8802312629289768, - "grad_norm": 7.57011604309082, - "learning_rate": 3.532947895118372e-05, - "loss": 0.7725, - "step": 99570 - }, - { - "epoch": 0.880319666189289, - "grad_norm": 2.140597105026245, - "learning_rate": 3.532800556351185e-05, - "loss": 0.7059, - "step": 99580 - }, - { - "epoch": 0.8804080694496013, - "grad_norm": 1.4484633207321167, - "learning_rate": 3.532653217583998e-05, - "loss": 0.6203, - "step": 99590 - }, - { - "epoch": 0.8804964727099135, - "grad_norm": 1.4211913347244263, - "learning_rate": 3.532505878816811e-05, - "loss": 0.5407, - "step": 99600 - }, - { - "epoch": 0.8805848759702258, - "grad_norm": 1.9758319854736328, - "learning_rate": 3.532358540049624e-05, - "loss": 0.6465, - "step": 99610 - }, - { - "epoch": 0.8806732792305381, - "grad_norm": 3.216864824295044, - "learning_rate": 3.532211201282437e-05, - "loss": 0.6105, - "step": 99620 - }, - { - "epoch": 0.8807616824908503, - "grad_norm": 34.49270248413086, - "learning_rate": 3.5320638625152497e-05, - "loss": 0.6752, - "step": 99630 - }, - { - "epoch": 0.8808500857511625, - "grad_norm": 1.2794069051742554, - "learning_rate": 3.5319165237480625e-05, - "loss": 0.6764, - "step": 99640 - }, - { - "epoch": 0.8809384890114748, - "grad_norm": 1.846698522567749, - "learning_rate": 3.531769184980875e-05, - "loss": 0.5829, - "step": 99650 - }, - { - "epoch": 0.881026892271787, - "grad_norm": 2.458574056625366, - "learning_rate": 3.531621846213689e-05, - "loss": 0.5706, - "step": 99660 - }, - { - "epoch": 0.8811152955320992, - "grad_norm": 3.632127046585083, - "learning_rate": 3.531474507446502e-05, - "loss": 0.8337, - "step": 99670 - }, - { - "epoch": 0.8812036987924114, - "grad_norm": 2.750758647918701, - "learning_rate": 3.5313271686793145e-05, - "loss": 0.553, - "step": 99680 - }, - { - "epoch": 0.8812921020527237, - "grad_norm": 3.2621114253997803, - "learning_rate": 3.5311798299121273e-05, - "loss": 0.6514, - "step": 99690 - }, - { - "epoch": 0.8813805053130359, - "grad_norm": 1.0674934387207031, - "learning_rate": 3.53103249114494e-05, - "loss": 0.5775, - "step": 99700 - }, - { - "epoch": 0.8814689085733481, - "grad_norm": 2.0522475242614746, - "learning_rate": 3.530885152377753e-05, - "loss": 0.603, - "step": 99710 - }, - { - "epoch": 0.8815573118336604, - "grad_norm": 1.4563353061676025, - "learning_rate": 3.5307378136105665e-05, - "loss": 0.7596, - "step": 99720 - }, - { - "epoch": 0.8816457150939727, - "grad_norm": 8.15146255493164, - "learning_rate": 3.530590474843379e-05, - "loss": 0.6854, - "step": 99730 - }, - { - "epoch": 0.881734118354285, - "grad_norm": 5.4734392166137695, - "learning_rate": 3.530443136076192e-05, - "loss": 0.5172, - "step": 99740 - }, - { - "epoch": 0.8818225216145972, - "grad_norm": 2.399756669998169, - "learning_rate": 3.530295797309005e-05, - "loss": 0.7575, - "step": 99750 - }, - { - "epoch": 0.8819109248749094, - "grad_norm": 2.3738224506378174, - "learning_rate": 3.530148458541818e-05, - "loss": 0.7188, - "step": 99760 - }, - { - "epoch": 0.8819993281352216, - "grad_norm": 2.2894015312194824, - "learning_rate": 3.530001119774631e-05, - "loss": 0.8088, - "step": 99770 - }, - { - "epoch": 0.8820877313955339, - "grad_norm": 1.5450955629348755, - "learning_rate": 3.529853781007444e-05, - "loss": 0.5708, - "step": 99780 - }, - { - "epoch": 0.8821761346558461, - "grad_norm": 1.1488293409347534, - "learning_rate": 3.5297064422402564e-05, - "loss": 0.6266, - "step": 99790 - }, - { - "epoch": 0.8822645379161583, - "grad_norm": 2.267016649246216, - "learning_rate": 3.52955910347307e-05, - "loss": 0.6417, - "step": 99800 - }, - { - "epoch": 0.8823529411764706, - "grad_norm": 2.547762155532837, - "learning_rate": 3.529411764705883e-05, - "loss": 0.7029, - "step": 99810 - }, - { - "epoch": 0.8824413444367828, - "grad_norm": 2.7036497592926025, - "learning_rate": 3.5292644259386956e-05, - "loss": 0.788, - "step": 99820 - }, - { - "epoch": 0.882529747697095, - "grad_norm": 2.031101942062378, - "learning_rate": 3.5291170871715084e-05, - "loss": 0.6671, - "step": 99830 - }, - { - "epoch": 0.8826181509574073, - "grad_norm": 2.2533857822418213, - "learning_rate": 3.528969748404322e-05, - "loss": 0.6051, - "step": 99840 - }, - { - "epoch": 0.8827065542177196, - "grad_norm": 5.356338977813721, - "learning_rate": 3.528822409637134e-05, - "loss": 0.7115, - "step": 99850 - }, - { - "epoch": 0.8827949574780318, - "grad_norm": 2.7008416652679443, - "learning_rate": 3.5286750708699476e-05, - "loss": 0.5552, - "step": 99860 - }, - { - "epoch": 0.8828833607383441, - "grad_norm": 2.2722997665405273, - "learning_rate": 3.52852773210276e-05, - "loss": 0.551, - "step": 99870 - }, - { - "epoch": 0.8829717639986563, - "grad_norm": 7.175367832183838, - "learning_rate": 3.528380393335573e-05, - "loss": 0.8399, - "step": 99880 - }, - { - "epoch": 0.8830601672589685, - "grad_norm": 3.9263012409210205, - "learning_rate": 3.528233054568386e-05, - "loss": 0.6357, - "step": 99890 - }, - { - "epoch": 0.8831485705192808, - "grad_norm": 0.8555113077163696, - "learning_rate": 3.528085715801199e-05, - "loss": 0.5515, - "step": 99900 - }, - { - "epoch": 0.883236973779593, - "grad_norm": 1.8814798593521118, - "learning_rate": 3.527938377034012e-05, - "loss": 0.5942, - "step": 99910 - }, - { - "epoch": 0.8833253770399052, - "grad_norm": 2.6422667503356934, - "learning_rate": 3.527791038266825e-05, - "loss": 0.6715, - "step": 99920 - }, - { - "epoch": 0.8834137803002174, - "grad_norm": 4.119536399841309, - "learning_rate": 3.5276436994996374e-05, - "loss": 0.7084, - "step": 99930 - }, - { - "epoch": 0.8835021835605297, - "grad_norm": 6.071832656860352, - "learning_rate": 3.527496360732451e-05, - "loss": 0.7154, - "step": 99940 - }, - { - "epoch": 0.8835905868208419, - "grad_norm": 2.373159408569336, - "learning_rate": 3.527349021965264e-05, - "loss": 0.7028, - "step": 99950 - }, - { - "epoch": 0.8836789900811542, - "grad_norm": 8.160861015319824, - "learning_rate": 3.5272016831980766e-05, - "loss": 0.7688, - "step": 99960 - }, - { - "epoch": 0.8837673933414665, - "grad_norm": 1.8556309938430786, - "learning_rate": 3.5270543444308894e-05, - "loss": 0.5306, - "step": 99970 - }, - { - "epoch": 0.8838557966017787, - "grad_norm": 1.4118374586105347, - "learning_rate": 3.526907005663702e-05, - "loss": 0.5776, - "step": 99980 - }, - { - "epoch": 0.8839441998620909, - "grad_norm": 4.978692531585693, - "learning_rate": 3.526759666896515e-05, - "loss": 0.671, - "step": 99990 - }, - { - "epoch": 0.8840326031224032, - "grad_norm": 2.377770185470581, - "learning_rate": 3.5266123281293286e-05, - "loss": 0.729, - "step": 100000 - }, - { - "epoch": 0.8841210063827154, - "grad_norm": 1.3143846988677979, - "learning_rate": 3.526464989362141e-05, - "loss": 0.6601, - "step": 100010 - }, - { - "epoch": 0.8842094096430276, - "grad_norm": 2.8539278507232666, - "learning_rate": 3.526317650594954e-05, - "loss": 0.5819, - "step": 100020 - }, - { - "epoch": 0.8842978129033399, - "grad_norm": 1.5120114088058472, - "learning_rate": 3.526170311827767e-05, - "loss": 0.6371, - "step": 100030 - }, - { - "epoch": 0.8843862161636521, - "grad_norm": 3.649439573287964, - "learning_rate": 3.52602297306058e-05, - "loss": 0.7258, - "step": 100040 - }, - { - "epoch": 0.8844746194239643, - "grad_norm": 3.1046438217163086, - "learning_rate": 3.525875634293393e-05, - "loss": 0.7519, - "step": 100050 - }, - { - "epoch": 0.8845630226842766, - "grad_norm": 3.6604366302490234, - "learning_rate": 3.525728295526206e-05, - "loss": 0.6225, - "step": 100060 - }, - { - "epoch": 0.8846514259445888, - "grad_norm": 3.448822021484375, - "learning_rate": 3.5255809567590185e-05, - "loss": 0.7538, - "step": 100070 - }, - { - "epoch": 0.8847398292049011, - "grad_norm": 2.1168973445892334, - "learning_rate": 3.525433617991832e-05, - "loss": 0.6728, - "step": 100080 - }, - { - "epoch": 0.8848282324652134, - "grad_norm": 10.837515830993652, - "learning_rate": 3.525286279224644e-05, - "loss": 0.579, - "step": 100090 - }, - { - "epoch": 0.8849166357255256, - "grad_norm": 5.5818586349487305, - "learning_rate": 3.5251389404574577e-05, - "loss": 0.5947, - "step": 100100 - }, - { - "epoch": 0.8850050389858378, - "grad_norm": 1.1673955917358398, - "learning_rate": 3.5249916016902705e-05, - "loss": 0.6881, - "step": 100110 - }, - { - "epoch": 0.88509344224615, - "grad_norm": 1.301098346710205, - "learning_rate": 3.524844262923083e-05, - "loss": 0.6105, - "step": 100120 - }, - { - "epoch": 0.8851818455064623, - "grad_norm": 4.43489933013916, - "learning_rate": 3.524696924155896e-05, - "loss": 0.6806, - "step": 100130 - }, - { - "epoch": 0.8852702487667745, - "grad_norm": 1.8990715742111206, - "learning_rate": 3.52454958538871e-05, - "loss": 0.6743, - "step": 100140 - }, - { - "epoch": 0.8853586520270867, - "grad_norm": 3.0658020973205566, - "learning_rate": 3.524402246621522e-05, - "loss": 0.6715, - "step": 100150 - }, - { - "epoch": 0.885447055287399, - "grad_norm": 1.5086337327957153, - "learning_rate": 3.5242549078543353e-05, - "loss": 0.7008, - "step": 100160 - }, - { - "epoch": 0.8855354585477112, - "grad_norm": 1.8033921718597412, - "learning_rate": 3.524107569087148e-05, - "loss": 0.7926, - "step": 100170 - }, - { - "epoch": 0.8856238618080234, - "grad_norm": 3.2836856842041016, - "learning_rate": 3.523960230319961e-05, - "loss": 0.7363, - "step": 100180 - }, - { - "epoch": 0.8857122650683357, - "grad_norm": 1.9718445539474487, - "learning_rate": 3.523812891552774e-05, - "loss": 0.6143, - "step": 100190 - }, - { - "epoch": 0.885800668328648, - "grad_norm": 4.54406213760376, - "learning_rate": 3.523665552785587e-05, - "loss": 0.6392, - "step": 100200 - }, - { - "epoch": 0.8858890715889602, - "grad_norm": 5.802214622497559, - "learning_rate": 3.5235182140183995e-05, - "loss": 0.6594, - "step": 100210 - }, - { - "epoch": 0.8859774748492725, - "grad_norm": 1.9690452814102173, - "learning_rate": 3.523370875251213e-05, - "loss": 0.5638, - "step": 100220 - }, - { - "epoch": 0.8860658781095847, - "grad_norm": 2.6623852252960205, - "learning_rate": 3.523223536484025e-05, - "loss": 0.7114, - "step": 100230 - }, - { - "epoch": 0.8861542813698969, - "grad_norm": 4.33412504196167, - "learning_rate": 3.523076197716839e-05, - "loss": 0.6726, - "step": 100240 - }, - { - "epoch": 0.8862426846302092, - "grad_norm": 3.843928098678589, - "learning_rate": 3.5229288589496515e-05, - "loss": 0.637, - "step": 100250 - }, - { - "epoch": 0.8863310878905214, - "grad_norm": 4.868276596069336, - "learning_rate": 3.5227815201824644e-05, - "loss": 0.7108, - "step": 100260 - }, - { - "epoch": 0.8864194911508336, - "grad_norm": 1.452610731124878, - "learning_rate": 3.522634181415277e-05, - "loss": 0.6581, - "step": 100270 - }, - { - "epoch": 0.8865078944111459, - "grad_norm": 1.7720494270324707, - "learning_rate": 3.522486842648091e-05, - "loss": 0.6808, - "step": 100280 - }, - { - "epoch": 0.8865962976714581, - "grad_norm": 9.473158836364746, - "learning_rate": 3.522339503880903e-05, - "loss": 0.7835, - "step": 100290 - }, - { - "epoch": 0.8866847009317703, - "grad_norm": 1.4104455709457397, - "learning_rate": 3.5221921651137164e-05, - "loss": 0.6491, - "step": 100300 - }, - { - "epoch": 0.8867731041920826, - "grad_norm": 8.778430938720703, - "learning_rate": 3.522044826346529e-05, - "loss": 0.6525, - "step": 100310 - }, - { - "epoch": 0.8868615074523949, - "grad_norm": 1.6890722513198853, - "learning_rate": 3.521897487579342e-05, - "loss": 0.6736, - "step": 100320 - }, - { - "epoch": 0.8869499107127071, - "grad_norm": 11.171053886413574, - "learning_rate": 3.521750148812155e-05, - "loss": 0.5095, - "step": 100330 - }, - { - "epoch": 0.8870383139730194, - "grad_norm": 2.19525146484375, - "learning_rate": 3.521602810044968e-05, - "loss": 0.7259, - "step": 100340 - }, - { - "epoch": 0.8871267172333316, - "grad_norm": 3.0850815773010254, - "learning_rate": 3.5214554712777806e-05, - "loss": 0.6318, - "step": 100350 - }, - { - "epoch": 0.8872151204936438, - "grad_norm": 7.177029609680176, - "learning_rate": 3.521308132510594e-05, - "loss": 0.8074, - "step": 100360 - }, - { - "epoch": 0.887303523753956, - "grad_norm": 4.595524311065674, - "learning_rate": 3.521160793743406e-05, - "loss": 0.6359, - "step": 100370 - }, - { - "epoch": 0.8873919270142683, - "grad_norm": 7.67150354385376, - "learning_rate": 3.52101345497622e-05, - "loss": 0.64, - "step": 100380 - }, - { - "epoch": 0.8874803302745805, - "grad_norm": 1.5217820405960083, - "learning_rate": 3.5208661162090326e-05, - "loss": 0.6981, - "step": 100390 - }, - { - "epoch": 0.8875687335348927, - "grad_norm": 3.5454249382019043, - "learning_rate": 3.5207187774418454e-05, - "loss": 0.6135, - "step": 100400 - }, - { - "epoch": 0.887657136795205, - "grad_norm": 5.299508571624756, - "learning_rate": 3.520571438674658e-05, - "loss": 0.7602, - "step": 100410 - }, - { - "epoch": 0.8877455400555172, - "grad_norm": 3.249697685241699, - "learning_rate": 3.520424099907472e-05, - "loss": 0.7282, - "step": 100420 - }, - { - "epoch": 0.8878339433158294, - "grad_norm": 5.020130634307861, - "learning_rate": 3.520276761140284e-05, - "loss": 0.6608, - "step": 100430 - }, - { - "epoch": 0.8879223465761418, - "grad_norm": 2.3925464153289795, - "learning_rate": 3.5201294223730974e-05, - "loss": 0.5725, - "step": 100440 - }, - { - "epoch": 0.888010749836454, - "grad_norm": 15.526290893554688, - "learning_rate": 3.51998208360591e-05, - "loss": 0.6876, - "step": 100450 - }, - { - "epoch": 0.8880991530967662, - "grad_norm": 4.03241491317749, - "learning_rate": 3.519834744838723e-05, - "loss": 0.6501, - "step": 100460 - }, - { - "epoch": 0.8881875563570785, - "grad_norm": 1.3994730710983276, - "learning_rate": 3.519687406071536e-05, - "loss": 0.7434, - "step": 100470 - }, - { - "epoch": 0.8882759596173907, - "grad_norm": 1.7547762393951416, - "learning_rate": 3.519540067304349e-05, - "loss": 0.6682, - "step": 100480 - }, - { - "epoch": 0.8883643628777029, - "grad_norm": 1.701341152191162, - "learning_rate": 3.5193927285371616e-05, - "loss": 0.6908, - "step": 100490 - }, - { - "epoch": 0.8884527661380152, - "grad_norm": 3.0670440196990967, - "learning_rate": 3.519245389769975e-05, - "loss": 0.705, - "step": 100500 - }, - { - "epoch": 0.8885411693983274, - "grad_norm": 0.8181890845298767, - "learning_rate": 3.519098051002788e-05, - "loss": 0.6324, - "step": 100510 - }, - { - "epoch": 0.8886295726586396, - "grad_norm": 3.015061616897583, - "learning_rate": 3.518950712235601e-05, - "loss": 0.6946, - "step": 100520 - }, - { - "epoch": 0.8887179759189519, - "grad_norm": 6.234442710876465, - "learning_rate": 3.5188033734684136e-05, - "loss": 0.5297, - "step": 100530 - }, - { - "epoch": 0.8888063791792641, - "grad_norm": 1.1581306457519531, - "learning_rate": 3.5186560347012265e-05, - "loss": 0.6742, - "step": 100540 - }, - { - "epoch": 0.8888947824395764, - "grad_norm": 1.9346543550491333, - "learning_rate": 3.518508695934039e-05, - "loss": 0.7385, - "step": 100550 - }, - { - "epoch": 0.8889831856998887, - "grad_norm": 4.378337383270264, - "learning_rate": 3.518361357166852e-05, - "loss": 0.6182, - "step": 100560 - }, - { - "epoch": 0.8890715889602009, - "grad_norm": 3.4411778450012207, - "learning_rate": 3.518214018399666e-05, - "loss": 0.659, - "step": 100570 - }, - { - "epoch": 0.8891599922205131, - "grad_norm": 2.7635605335235596, - "learning_rate": 3.5180666796324785e-05, - "loss": 0.7243, - "step": 100580 - }, - { - "epoch": 0.8892483954808253, - "grad_norm": 5.1399383544921875, - "learning_rate": 3.517919340865291e-05, - "loss": 0.6066, - "step": 100590 - }, - { - "epoch": 0.8893367987411376, - "grad_norm": 1.4909334182739258, - "learning_rate": 3.517772002098104e-05, - "loss": 0.6279, - "step": 100600 - }, - { - "epoch": 0.8894252020014498, - "grad_norm": 4.729198455810547, - "learning_rate": 3.517624663330917e-05, - "loss": 0.722, - "step": 100610 - }, - { - "epoch": 0.889513605261762, - "grad_norm": 5.377867698669434, - "learning_rate": 3.51747732456373e-05, - "loss": 0.5887, - "step": 100620 - }, - { - "epoch": 0.8896020085220743, - "grad_norm": 1.6192539930343628, - "learning_rate": 3.5173299857965434e-05, - "loss": 0.6734, - "step": 100630 - }, - { - "epoch": 0.8896904117823865, - "grad_norm": 3.3911421298980713, - "learning_rate": 3.517182647029356e-05, - "loss": 0.707, - "step": 100640 - }, - { - "epoch": 0.8897788150426987, - "grad_norm": 1.5079931020736694, - "learning_rate": 3.517035308262169e-05, - "loss": 0.5818, - "step": 100650 - }, - { - "epoch": 0.889867218303011, - "grad_norm": 4.550739765167236, - "learning_rate": 3.516887969494982e-05, - "loss": 0.5759, - "step": 100660 - }, - { - "epoch": 0.8899556215633233, - "grad_norm": 4.818974494934082, - "learning_rate": 3.516740630727795e-05, - "loss": 0.6299, - "step": 100670 - }, - { - "epoch": 0.8900440248236355, - "grad_norm": 8.771803855895996, - "learning_rate": 3.5165932919606075e-05, - "loss": 0.6815, - "step": 100680 - }, - { - "epoch": 0.8901324280839478, - "grad_norm": 1.6297937631607056, - "learning_rate": 3.516445953193421e-05, - "loss": 0.6405, - "step": 100690 - }, - { - "epoch": 0.89022083134426, - "grad_norm": 1.503909707069397, - "learning_rate": 3.516298614426233e-05, - "loss": 0.5347, - "step": 100700 - }, - { - "epoch": 0.8903092346045722, - "grad_norm": 4.525701999664307, - "learning_rate": 3.516151275659047e-05, - "loss": 0.6617, - "step": 100710 - }, - { - "epoch": 0.8903976378648845, - "grad_norm": 1.8797188997268677, - "learning_rate": 3.5160039368918596e-05, - "loss": 0.597, - "step": 100720 - }, - { - "epoch": 0.8904860411251967, - "grad_norm": 3.4912447929382324, - "learning_rate": 3.5158565981246724e-05, - "loss": 0.6898, - "step": 100730 - }, - { - "epoch": 0.8905744443855089, - "grad_norm": 6.8011956214904785, - "learning_rate": 3.515709259357485e-05, - "loss": 0.6806, - "step": 100740 - }, - { - "epoch": 0.8906628476458212, - "grad_norm": 10.962492942810059, - "learning_rate": 3.515561920590299e-05, - "loss": 0.6588, - "step": 100750 - }, - { - "epoch": 0.8907512509061334, - "grad_norm": 2.307803153991699, - "learning_rate": 3.515414581823111e-05, - "loss": 0.6848, - "step": 100760 - }, - { - "epoch": 0.8908396541664456, - "grad_norm": 1.471994400024414, - "learning_rate": 3.5152672430559244e-05, - "loss": 0.6145, - "step": 100770 - }, - { - "epoch": 0.8909280574267578, - "grad_norm": 1.0318900346755981, - "learning_rate": 3.515119904288737e-05, - "loss": 0.5517, - "step": 100780 - }, - { - "epoch": 0.8910164606870702, - "grad_norm": 3.9565703868865967, - "learning_rate": 3.51497256552155e-05, - "loss": 0.7354, - "step": 100790 - }, - { - "epoch": 0.8911048639473824, - "grad_norm": 3.331211805343628, - "learning_rate": 3.514825226754363e-05, - "loss": 0.5486, - "step": 100800 - }, - { - "epoch": 0.8911932672076947, - "grad_norm": 3.1534969806671143, - "learning_rate": 3.514677887987176e-05, - "loss": 0.6251, - "step": 100810 - }, - { - "epoch": 0.8912816704680069, - "grad_norm": 0.961853563785553, - "learning_rate": 3.5145305492199886e-05, - "loss": 0.5399, - "step": 100820 - }, - { - "epoch": 0.8913700737283191, - "grad_norm": 0.6532862782478333, - "learning_rate": 3.514383210452802e-05, - "loss": 0.5547, - "step": 100830 - }, - { - "epoch": 0.8914584769886313, - "grad_norm": 1.2637436389923096, - "learning_rate": 3.514235871685614e-05, - "loss": 0.7035, - "step": 100840 - }, - { - "epoch": 0.8915468802489436, - "grad_norm": 2.0397205352783203, - "learning_rate": 3.514088532918428e-05, - "loss": 0.5815, - "step": 100850 - }, - { - "epoch": 0.8916352835092558, - "grad_norm": 4.124575614929199, - "learning_rate": 3.5139411941512406e-05, - "loss": 0.6324, - "step": 100860 - }, - { - "epoch": 0.891723686769568, - "grad_norm": 7.4020094871521, - "learning_rate": 3.5137938553840534e-05, - "loss": 0.6288, - "step": 100870 - }, - { - "epoch": 0.8918120900298803, - "grad_norm": 5.552517414093018, - "learning_rate": 3.513646516616866e-05, - "loss": 0.6562, - "step": 100880 - }, - { - "epoch": 0.8919004932901925, - "grad_norm": 1.8676708936691284, - "learning_rate": 3.51349917784968e-05, - "loss": 0.9967, - "step": 100890 - }, - { - "epoch": 0.8919888965505047, - "grad_norm": 3.641599416732788, - "learning_rate": 3.513351839082492e-05, - "loss": 0.5815, - "step": 100900 - }, - { - "epoch": 0.8920772998108171, - "grad_norm": 2.785343647003174, - "learning_rate": 3.5132045003153055e-05, - "loss": 0.707, - "step": 100910 - }, - { - "epoch": 0.8921657030711293, - "grad_norm": 4.495552062988281, - "learning_rate": 3.5130571615481176e-05, - "loss": 0.6783, - "step": 100920 - }, - { - "epoch": 0.8922541063314415, - "grad_norm": 6.22516393661499, - "learning_rate": 3.512909822780931e-05, - "loss": 0.5645, - "step": 100930 - }, - { - "epoch": 0.8923425095917538, - "grad_norm": 2.9545347690582275, - "learning_rate": 3.512762484013744e-05, - "loss": 0.5692, - "step": 100940 - }, - { - "epoch": 0.892430912852066, - "grad_norm": 8.589646339416504, - "learning_rate": 3.512615145246557e-05, - "loss": 0.6504, - "step": 100950 - }, - { - "epoch": 0.8925193161123782, - "grad_norm": 7.866554260253906, - "learning_rate": 3.5124678064793696e-05, - "loss": 0.6609, - "step": 100960 - }, - { - "epoch": 0.8926077193726905, - "grad_norm": 1.5530755519866943, - "learning_rate": 3.512320467712183e-05, - "loss": 0.722, - "step": 100970 - }, - { - "epoch": 0.8926961226330027, - "grad_norm": 1.5333573818206787, - "learning_rate": 3.512173128944995e-05, - "loss": 0.6719, - "step": 100980 - }, - { - "epoch": 0.8927845258933149, - "grad_norm": 1.0072578191757202, - "learning_rate": 3.512025790177809e-05, - "loss": 0.5213, - "step": 100990 - }, - { - "epoch": 0.8928729291536271, - "grad_norm": 10.758806228637695, - "learning_rate": 3.5118784514106217e-05, - "loss": 0.6005, - "step": 101000 - }, - { - "epoch": 0.8929613324139394, - "grad_norm": 3.3677430152893066, - "learning_rate": 3.5117311126434345e-05, - "loss": 0.5697, - "step": 101010 - }, - { - "epoch": 0.8930497356742517, - "grad_norm": 1.2367812395095825, - "learning_rate": 3.511583773876247e-05, - "loss": 0.5482, - "step": 101020 - }, - { - "epoch": 0.893138138934564, - "grad_norm": 1.904903769493103, - "learning_rate": 3.51143643510906e-05, - "loss": 0.6816, - "step": 101030 - }, - { - "epoch": 0.8932265421948762, - "grad_norm": 3.000394105911255, - "learning_rate": 3.511289096341873e-05, - "loss": 0.7403, - "step": 101040 - }, - { - "epoch": 0.8933149454551884, - "grad_norm": 1.9283185005187988, - "learning_rate": 3.5111417575746865e-05, - "loss": 0.6826, - "step": 101050 - }, - { - "epoch": 0.8934033487155006, - "grad_norm": 3.345632791519165, - "learning_rate": 3.510994418807499e-05, - "loss": 0.6579, - "step": 101060 - }, - { - "epoch": 0.8934917519758129, - "grad_norm": 2.5902278423309326, - "learning_rate": 3.510847080040312e-05, - "loss": 0.6711, - "step": 101070 - }, - { - "epoch": 0.8935801552361251, - "grad_norm": 2.029496669769287, - "learning_rate": 3.510699741273125e-05, - "loss": 0.6894, - "step": 101080 - }, - { - "epoch": 0.8936685584964373, - "grad_norm": 1.384575366973877, - "learning_rate": 3.510552402505938e-05, - "loss": 0.6912, - "step": 101090 - }, - { - "epoch": 0.8937569617567496, - "grad_norm": 3.0527894496917725, - "learning_rate": 3.510405063738751e-05, - "loss": 0.7272, - "step": 101100 - }, - { - "epoch": 0.8938453650170618, - "grad_norm": 11.757392883300781, - "learning_rate": 3.510257724971564e-05, - "loss": 0.5911, - "step": 101110 - }, - { - "epoch": 0.893933768277374, - "grad_norm": 1.9908053874969482, - "learning_rate": 3.5101103862043764e-05, - "loss": 0.7402, - "step": 101120 - }, - { - "epoch": 0.8940221715376863, - "grad_norm": 1.9703317880630493, - "learning_rate": 3.50996304743719e-05, - "loss": 0.687, - "step": 101130 - }, - { - "epoch": 0.8941105747979986, - "grad_norm": 3.663726329803467, - "learning_rate": 3.509815708670002e-05, - "loss": 0.6811, - "step": 101140 - }, - { - "epoch": 0.8941989780583108, - "grad_norm": 4.681532859802246, - "learning_rate": 3.5096683699028155e-05, - "loss": 0.6492, - "step": 101150 - }, - { - "epoch": 0.8942873813186231, - "grad_norm": 3.539275884628296, - "learning_rate": 3.5095210311356284e-05, - "loss": 0.6097, - "step": 101160 - }, - { - "epoch": 0.8943757845789353, - "grad_norm": 5.129672527313232, - "learning_rate": 3.509373692368441e-05, - "loss": 0.6826, - "step": 101170 - }, - { - "epoch": 0.8944641878392475, - "grad_norm": 2.5532431602478027, - "learning_rate": 3.509226353601254e-05, - "loss": 0.6332, - "step": 101180 - }, - { - "epoch": 0.8945525910995598, - "grad_norm": 16.827884674072266, - "learning_rate": 3.5090790148340676e-05, - "loss": 0.6399, - "step": 101190 - }, - { - "epoch": 0.894640994359872, - "grad_norm": 8.727770805358887, - "learning_rate": 3.50893167606688e-05, - "loss": 0.5987, - "step": 101200 - }, - { - "epoch": 0.8947293976201842, - "grad_norm": 0.9801639914512634, - "learning_rate": 3.508784337299693e-05, - "loss": 0.6697, - "step": 101210 - }, - { - "epoch": 0.8948178008804965, - "grad_norm": 1.7317551374435425, - "learning_rate": 3.508636998532506e-05, - "loss": 0.7046, - "step": 101220 - }, - { - "epoch": 0.8949062041408087, - "grad_norm": 1.1599980592727661, - "learning_rate": 3.508489659765319e-05, - "loss": 0.6924, - "step": 101230 - }, - { - "epoch": 0.8949946074011209, - "grad_norm": 6.136709213256836, - "learning_rate": 3.508342320998132e-05, - "loss": 0.6045, - "step": 101240 - }, - { - "epoch": 0.8950830106614331, - "grad_norm": 2.312178373336792, - "learning_rate": 3.508194982230945e-05, - "loss": 0.5173, - "step": 101250 - }, - { - "epoch": 0.8951714139217455, - "grad_norm": 5.154534816741943, - "learning_rate": 3.5080476434637574e-05, - "loss": 0.574, - "step": 101260 - }, - { - "epoch": 0.8952598171820577, - "grad_norm": 7.279311180114746, - "learning_rate": 3.507900304696571e-05, - "loss": 0.7546, - "step": 101270 - }, - { - "epoch": 0.89534822044237, - "grad_norm": 1.5704989433288574, - "learning_rate": 3.507752965929383e-05, - "loss": 0.5996, - "step": 101280 - }, - { - "epoch": 0.8954366237026822, - "grad_norm": 1.6563899517059326, - "learning_rate": 3.5076056271621966e-05, - "loss": 0.6485, - "step": 101290 - }, - { - "epoch": 0.8955250269629944, - "grad_norm": 2.398397207260132, - "learning_rate": 3.5074582883950094e-05, - "loss": 0.6093, - "step": 101300 - }, - { - "epoch": 0.8956134302233066, - "grad_norm": 2.6400833129882812, - "learning_rate": 3.507310949627822e-05, - "loss": 0.6433, - "step": 101310 - }, - { - "epoch": 0.8957018334836189, - "grad_norm": 1.1053946018218994, - "learning_rate": 3.507163610860635e-05, - "loss": 0.6295, - "step": 101320 - }, - { - "epoch": 0.8957902367439311, - "grad_norm": 2.3099589347839355, - "learning_rate": 3.5070162720934486e-05, - "loss": 0.6705, - "step": 101330 - }, - { - "epoch": 0.8958786400042433, - "grad_norm": 4.429075717926025, - "learning_rate": 3.506868933326261e-05, - "loss": 0.6963, - "step": 101340 - }, - { - "epoch": 0.8959670432645556, - "grad_norm": 2.8399767875671387, - "learning_rate": 3.506721594559074e-05, - "loss": 0.6676, - "step": 101350 - }, - { - "epoch": 0.8960554465248678, - "grad_norm": 2.4658477306365967, - "learning_rate": 3.506574255791887e-05, - "loss": 0.7333, - "step": 101360 - }, - { - "epoch": 0.89614384978518, - "grad_norm": 15.264657974243164, - "learning_rate": 3.5064269170247e-05, - "loss": 0.6424, - "step": 101370 - }, - { - "epoch": 0.8962322530454924, - "grad_norm": 8.383511543273926, - "learning_rate": 3.506279578257513e-05, - "loss": 0.6973, - "step": 101380 - }, - { - "epoch": 0.8963206563058046, - "grad_norm": 2.5382955074310303, - "learning_rate": 3.5061322394903256e-05, - "loss": 0.8195, - "step": 101390 - }, - { - "epoch": 0.8964090595661168, - "grad_norm": 9.689706802368164, - "learning_rate": 3.5059849007231385e-05, - "loss": 0.67, - "step": 101400 - }, - { - "epoch": 0.8964974628264291, - "grad_norm": 2.7874510288238525, - "learning_rate": 3.505837561955952e-05, - "loss": 0.7002, - "step": 101410 - }, - { - "epoch": 0.8965858660867413, - "grad_norm": 4.352518558502197, - "learning_rate": 3.505690223188765e-05, - "loss": 0.7201, - "step": 101420 - }, - { - "epoch": 0.8966742693470535, - "grad_norm": 3.670970916748047, - "learning_rate": 3.5055428844215776e-05, - "loss": 0.6365, - "step": 101430 - }, - { - "epoch": 0.8967626726073658, - "grad_norm": 2.897301197052002, - "learning_rate": 3.5053955456543905e-05, - "loss": 0.5498, - "step": 101440 - }, - { - "epoch": 0.896851075867678, - "grad_norm": 2.6691713333129883, - "learning_rate": 3.505248206887203e-05, - "loss": 0.5476, - "step": 101450 - }, - { - "epoch": 0.8969394791279902, - "grad_norm": 1.6963719129562378, - "learning_rate": 3.505100868120016e-05, - "loss": 0.7333, - "step": 101460 - }, - { - "epoch": 0.8970278823883024, - "grad_norm": 1.7317208051681519, - "learning_rate": 3.5049535293528297e-05, - "loss": 0.6961, - "step": 101470 - }, - { - "epoch": 0.8971162856486147, - "grad_norm": 2.3766930103302, - "learning_rate": 3.5048061905856425e-05, - "loss": 0.7015, - "step": 101480 - }, - { - "epoch": 0.8972046889089269, - "grad_norm": 0.9640777707099915, - "learning_rate": 3.504658851818455e-05, - "loss": 0.6253, - "step": 101490 - }, - { - "epoch": 0.8972930921692392, - "grad_norm": 5.681628704071045, - "learning_rate": 3.504511513051268e-05, - "loss": 0.636, - "step": 101500 - }, - { - "epoch": 0.8973814954295515, - "grad_norm": 3.1678149700164795, - "learning_rate": 3.504364174284081e-05, - "loss": 0.589, - "step": 101510 - }, - { - "epoch": 0.8974698986898637, - "grad_norm": 3.6972336769104004, - "learning_rate": 3.504216835516894e-05, - "loss": 0.5261, - "step": 101520 - }, - { - "epoch": 0.8975583019501759, - "grad_norm": 1.8316400051116943, - "learning_rate": 3.504069496749707e-05, - "loss": 0.6979, - "step": 101530 - }, - { - "epoch": 0.8976467052104882, - "grad_norm": 8.349693298339844, - "learning_rate": 3.50392215798252e-05, - "loss": 0.6079, - "step": 101540 - }, - { - "epoch": 0.8977351084708004, - "grad_norm": 4.87383508682251, - "learning_rate": 3.503774819215333e-05, - "loss": 0.6064, - "step": 101550 - }, - { - "epoch": 0.8978235117311126, - "grad_norm": 10.393636703491211, - "learning_rate": 3.503627480448146e-05, - "loss": 0.7332, - "step": 101560 - }, - { - "epoch": 0.8979119149914249, - "grad_norm": 6.1662492752075195, - "learning_rate": 3.503480141680959e-05, - "loss": 0.6072, - "step": 101570 - }, - { - "epoch": 0.8980003182517371, - "grad_norm": 2.561657428741455, - "learning_rate": 3.5033328029137715e-05, - "loss": 0.7546, - "step": 101580 - }, - { - "epoch": 0.8980887215120493, - "grad_norm": 2.8024821281433105, - "learning_rate": 3.5031854641465844e-05, - "loss": 0.6235, - "step": 101590 - }, - { - "epoch": 0.8981771247723616, - "grad_norm": 2.377392053604126, - "learning_rate": 3.503038125379398e-05, - "loss": 0.5911, - "step": 101600 - }, - { - "epoch": 0.8982655280326739, - "grad_norm": 3.295403242111206, - "learning_rate": 3.50289078661221e-05, - "loss": 0.6485, - "step": 101610 - }, - { - "epoch": 0.8983539312929861, - "grad_norm": 0.9697879552841187, - "learning_rate": 3.5027434478450235e-05, - "loss": 0.684, - "step": 101620 - }, - { - "epoch": 0.8984423345532984, - "grad_norm": 5.126312732696533, - "learning_rate": 3.5025961090778364e-05, - "loss": 0.5952, - "step": 101630 - }, - { - "epoch": 0.8985307378136106, - "grad_norm": 10.524635314941406, - "learning_rate": 3.502448770310649e-05, - "loss": 0.8219, - "step": 101640 - }, - { - "epoch": 0.8986191410739228, - "grad_norm": 6.454860210418701, - "learning_rate": 3.502301431543462e-05, - "loss": 0.5776, - "step": 101650 - }, - { - "epoch": 0.898707544334235, - "grad_norm": 5.484509468078613, - "learning_rate": 3.5021540927762756e-05, - "loss": 0.6469, - "step": 101660 - }, - { - "epoch": 0.8987959475945473, - "grad_norm": 9.468827247619629, - "learning_rate": 3.502006754009088e-05, - "loss": 0.6623, - "step": 101670 - }, - { - "epoch": 0.8988843508548595, - "grad_norm": 3.075105667114258, - "learning_rate": 3.501859415241901e-05, - "loss": 0.7001, - "step": 101680 - }, - { - "epoch": 0.8989727541151717, - "grad_norm": 1.0968987941741943, - "learning_rate": 3.501712076474714e-05, - "loss": 0.6179, - "step": 101690 - }, - { - "epoch": 0.899061157375484, - "grad_norm": 3.3807880878448486, - "learning_rate": 3.501564737707527e-05, - "loss": 0.6345, - "step": 101700 - }, - { - "epoch": 0.8991495606357962, - "grad_norm": 7.7511067390441895, - "learning_rate": 3.50141739894034e-05, - "loss": 0.7584, - "step": 101710 - }, - { - "epoch": 0.8992379638961084, - "grad_norm": 3.7797470092773438, - "learning_rate": 3.501270060173153e-05, - "loss": 0.6759, - "step": 101720 - }, - { - "epoch": 0.8993263671564208, - "grad_norm": 2.490063190460205, - "learning_rate": 3.5011227214059654e-05, - "loss": 0.6778, - "step": 101730 - }, - { - "epoch": 0.899414770416733, - "grad_norm": 3.2917134761810303, - "learning_rate": 3.500975382638779e-05, - "loss": 0.7253, - "step": 101740 - }, - { - "epoch": 0.8995031736770452, - "grad_norm": 2.1594207286834717, - "learning_rate": 3.500828043871591e-05, - "loss": 0.6469, - "step": 101750 - }, - { - "epoch": 0.8995915769373575, - "grad_norm": 2.15397047996521, - "learning_rate": 3.5006807051044046e-05, - "loss": 0.6228, - "step": 101760 - }, - { - "epoch": 0.8996799801976697, - "grad_norm": 3.32759165763855, - "learning_rate": 3.5005333663372174e-05, - "loss": 0.6176, - "step": 101770 - }, - { - "epoch": 0.8997683834579819, - "grad_norm": 1.747042179107666, - "learning_rate": 3.50038602757003e-05, - "loss": 0.7409, - "step": 101780 - }, - { - "epoch": 0.8998567867182942, - "grad_norm": 3.225634813308716, - "learning_rate": 3.500238688802843e-05, - "loss": 0.7049, - "step": 101790 - }, - { - "epoch": 0.8999451899786064, - "grad_norm": 4.23330020904541, - "learning_rate": 3.5000913500356566e-05, - "loss": 0.6701, - "step": 101800 - }, - { - "epoch": 0.9000335932389186, - "grad_norm": 1.5757709741592407, - "learning_rate": 3.499944011268469e-05, - "loss": 0.5276, - "step": 101810 - }, - { - "epoch": 0.9001219964992309, - "grad_norm": 2.1911356449127197, - "learning_rate": 3.499796672501282e-05, - "loss": 0.6656, - "step": 101820 - }, - { - "epoch": 0.9002103997595431, - "grad_norm": 1.4769911766052246, - "learning_rate": 3.499649333734095e-05, - "loss": 0.5302, - "step": 101830 - }, - { - "epoch": 0.9002988030198553, - "grad_norm": 2.9458811283111572, - "learning_rate": 3.499501994966908e-05, - "loss": 0.5677, - "step": 101840 - }, - { - "epoch": 0.9003872062801677, - "grad_norm": 4.243816375732422, - "learning_rate": 3.499354656199721e-05, - "loss": 0.6722, - "step": 101850 - }, - { - "epoch": 0.9004756095404799, - "grad_norm": 2.830928087234497, - "learning_rate": 3.4992073174325336e-05, - "loss": 0.7675, - "step": 101860 - }, - { - "epoch": 0.9005640128007921, - "grad_norm": 7.463563919067383, - "learning_rate": 3.4990599786653465e-05, - "loss": 0.6429, - "step": 101870 - }, - { - "epoch": 0.9006524160611044, - "grad_norm": 3.8281073570251465, - "learning_rate": 3.49891263989816e-05, - "loss": 0.5814, - "step": 101880 - }, - { - "epoch": 0.9007408193214166, - "grad_norm": 2.409959554672241, - "learning_rate": 3.498765301130972e-05, - "loss": 0.7404, - "step": 101890 - }, - { - "epoch": 0.9008292225817288, - "grad_norm": 8.971677780151367, - "learning_rate": 3.4986179623637856e-05, - "loss": 0.6807, - "step": 101900 - }, - { - "epoch": 0.900917625842041, - "grad_norm": 5.7753472328186035, - "learning_rate": 3.4984706235965985e-05, - "loss": 0.6223, - "step": 101910 - }, - { - "epoch": 0.9010060291023533, - "grad_norm": 4.067429542541504, - "learning_rate": 3.498323284829411e-05, - "loss": 0.5396, - "step": 101920 - }, - { - "epoch": 0.9010944323626655, - "grad_norm": 1.4998195171356201, - "learning_rate": 3.498175946062224e-05, - "loss": 0.6694, - "step": 101930 - }, - { - "epoch": 0.9011828356229777, - "grad_norm": 3.7504825592041016, - "learning_rate": 3.498028607295038e-05, - "loss": 0.7412, - "step": 101940 - }, - { - "epoch": 0.90127123888329, - "grad_norm": 6.002006530761719, - "learning_rate": 3.49788126852785e-05, - "loss": 0.742, - "step": 101950 - }, - { - "epoch": 0.9013596421436022, - "grad_norm": 3.166353940963745, - "learning_rate": 3.497733929760663e-05, - "loss": 0.6574, - "step": 101960 - }, - { - "epoch": 0.9014480454039145, - "grad_norm": 8.615494728088379, - "learning_rate": 3.4975865909934755e-05, - "loss": 0.6139, - "step": 101970 - }, - { - "epoch": 0.9015364486642268, - "grad_norm": 13.972883224487305, - "learning_rate": 3.497439252226289e-05, - "loss": 0.6576, - "step": 101980 - }, - { - "epoch": 0.901624851924539, - "grad_norm": 2.6312336921691895, - "learning_rate": 3.497291913459102e-05, - "loss": 0.6305, - "step": 101990 - }, - { - "epoch": 0.9017132551848512, - "grad_norm": 12.213671684265137, - "learning_rate": 3.497144574691915e-05, - "loss": 0.7336, - "step": 102000 - }, - { - "epoch": 0.9018016584451635, - "grad_norm": 1.814104676246643, - "learning_rate": 3.4969972359247275e-05, - "loss": 0.505, - "step": 102010 - }, - { - "epoch": 0.9018900617054757, - "grad_norm": 1.0694621801376343, - "learning_rate": 3.496849897157541e-05, - "loss": 0.5508, - "step": 102020 - }, - { - "epoch": 0.9019784649657879, - "grad_norm": 1.7550774812698364, - "learning_rate": 3.496702558390353e-05, - "loss": 0.7223, - "step": 102030 - }, - { - "epoch": 0.9020668682261002, - "grad_norm": 4.128680229187012, - "learning_rate": 3.496555219623167e-05, - "loss": 0.586, - "step": 102040 - }, - { - "epoch": 0.9021552714864124, - "grad_norm": 4.750894546508789, - "learning_rate": 3.4964078808559795e-05, - "loss": 0.6127, - "step": 102050 - }, - { - "epoch": 0.9022436747467246, - "grad_norm": 4.454137802124023, - "learning_rate": 3.4962605420887924e-05, - "loss": 0.7838, - "step": 102060 - }, - { - "epoch": 0.9023320780070369, - "grad_norm": 2.62306547164917, - "learning_rate": 3.496113203321605e-05, - "loss": 0.721, - "step": 102070 - }, - { - "epoch": 0.9024204812673492, - "grad_norm": 2.284649133682251, - "learning_rate": 3.495965864554418e-05, - "loss": 0.6146, - "step": 102080 - }, - { - "epoch": 0.9025088845276614, - "grad_norm": 2.9690709114074707, - "learning_rate": 3.495818525787231e-05, - "loss": 0.5991, - "step": 102090 - }, - { - "epoch": 0.9025972877879737, - "grad_norm": 2.8630611896514893, - "learning_rate": 3.4956711870200444e-05, - "loss": 0.5894, - "step": 102100 - }, - { - "epoch": 0.9026856910482859, - "grad_norm": 5.967788219451904, - "learning_rate": 3.4955238482528565e-05, - "loss": 0.6866, - "step": 102110 - }, - { - "epoch": 0.9027740943085981, - "grad_norm": 1.9666553735733032, - "learning_rate": 3.49537650948567e-05, - "loss": 0.6933, - "step": 102120 - }, - { - "epoch": 0.9028624975689103, - "grad_norm": 1.8393679857254028, - "learning_rate": 3.495229170718483e-05, - "loss": 0.6108, - "step": 102130 - }, - { - "epoch": 0.9029509008292226, - "grad_norm": 2.8642807006835938, - "learning_rate": 3.495081831951296e-05, - "loss": 0.6309, - "step": 102140 - }, - { - "epoch": 0.9030393040895348, - "grad_norm": 1.292883276939392, - "learning_rate": 3.4949344931841086e-05, - "loss": 0.4453, - "step": 102150 - }, - { - "epoch": 0.903127707349847, - "grad_norm": 8.02357292175293, - "learning_rate": 3.494787154416922e-05, - "loss": 0.6735, - "step": 102160 - }, - { - "epoch": 0.9032161106101593, - "grad_norm": 9.84030818939209, - "learning_rate": 3.494639815649734e-05, - "loss": 0.5224, - "step": 102170 - }, - { - "epoch": 0.9033045138704715, - "grad_norm": 9.755715370178223, - "learning_rate": 3.494492476882548e-05, - "loss": 0.5777, - "step": 102180 - }, - { - "epoch": 0.9033929171307837, - "grad_norm": 2.8910040855407715, - "learning_rate": 3.4943451381153606e-05, - "loss": 0.8668, - "step": 102190 - }, - { - "epoch": 0.9034813203910961, - "grad_norm": 2.0276455879211426, - "learning_rate": 3.4941977993481734e-05, - "loss": 0.6185, - "step": 102200 - }, - { - "epoch": 0.9035697236514083, - "grad_norm": 5.360898017883301, - "learning_rate": 3.494050460580986e-05, - "loss": 0.6512, - "step": 102210 - }, - { - "epoch": 0.9036581269117205, - "grad_norm": 1.9048824310302734, - "learning_rate": 3.493903121813799e-05, - "loss": 0.5484, - "step": 102220 - }, - { - "epoch": 0.9037465301720328, - "grad_norm": 15.215572357177734, - "learning_rate": 3.493755783046612e-05, - "loss": 0.6705, - "step": 102230 - }, - { - "epoch": 0.903834933432345, - "grad_norm": 2.206956386566162, - "learning_rate": 3.4936084442794254e-05, - "loss": 0.6876, - "step": 102240 - }, - { - "epoch": 0.9039233366926572, - "grad_norm": 2.6515204906463623, - "learning_rate": 3.4934611055122376e-05, - "loss": 0.5921, - "step": 102250 - }, - { - "epoch": 0.9040117399529695, - "grad_norm": 4.575997829437256, - "learning_rate": 3.493313766745051e-05, - "loss": 0.5741, - "step": 102260 - }, - { - "epoch": 0.9041001432132817, - "grad_norm": 8.7185697555542, - "learning_rate": 3.493166427977864e-05, - "loss": 0.5363, - "step": 102270 - }, - { - "epoch": 0.9041885464735939, - "grad_norm": 1.7065107822418213, - "learning_rate": 3.493019089210677e-05, - "loss": 0.5292, - "step": 102280 - }, - { - "epoch": 0.9042769497339062, - "grad_norm": 3.5478756427764893, - "learning_rate": 3.4928717504434896e-05, - "loss": 0.5829, - "step": 102290 - }, - { - "epoch": 0.9043653529942184, - "grad_norm": 2.1791701316833496, - "learning_rate": 3.492724411676303e-05, - "loss": 0.5794, - "step": 102300 - }, - { - "epoch": 0.9044537562545306, - "grad_norm": 4.497156143188477, - "learning_rate": 3.492577072909115e-05, - "loss": 0.5913, - "step": 102310 - }, - { - "epoch": 0.904542159514843, - "grad_norm": 2.201439142227173, - "learning_rate": 3.492429734141929e-05, - "loss": 0.7369, - "step": 102320 - }, - { - "epoch": 0.9046305627751552, - "grad_norm": 15.413252830505371, - "learning_rate": 3.4922823953747416e-05, - "loss": 0.5469, - "step": 102330 - }, - { - "epoch": 0.9047189660354674, - "grad_norm": 9.893781661987305, - "learning_rate": 3.4921350566075545e-05, - "loss": 0.5994, - "step": 102340 - }, - { - "epoch": 0.9048073692957797, - "grad_norm": 6.480618953704834, - "learning_rate": 3.491987717840367e-05, - "loss": 0.5544, - "step": 102350 - }, - { - "epoch": 0.9048957725560919, - "grad_norm": 7.362419605255127, - "learning_rate": 3.49184037907318e-05, - "loss": 0.7763, - "step": 102360 - }, - { - "epoch": 0.9049841758164041, - "grad_norm": 1.6436314582824707, - "learning_rate": 3.491693040305993e-05, - "loss": 0.6397, - "step": 102370 - }, - { - "epoch": 0.9050725790767163, - "grad_norm": 0.9055808782577515, - "learning_rate": 3.4915457015388065e-05, - "loss": 0.5555, - "step": 102380 - }, - { - "epoch": 0.9051609823370286, - "grad_norm": 1.5474635362625122, - "learning_rate": 3.491398362771619e-05, - "loss": 0.648, - "step": 102390 - }, - { - "epoch": 0.9052493855973408, - "grad_norm": 1.456683874130249, - "learning_rate": 3.491251024004432e-05, - "loss": 0.7182, - "step": 102400 - }, - { - "epoch": 0.905337788857653, - "grad_norm": 6.114360809326172, - "learning_rate": 3.491103685237245e-05, - "loss": 0.5114, - "step": 102410 - }, - { - "epoch": 0.9054261921179653, - "grad_norm": 1.343212366104126, - "learning_rate": 3.490956346470058e-05, - "loss": 0.5927, - "step": 102420 - }, - { - "epoch": 0.9055145953782775, - "grad_norm": 2.227585792541504, - "learning_rate": 3.490809007702871e-05, - "loss": 0.5756, - "step": 102430 - }, - { - "epoch": 0.9056029986385898, - "grad_norm": 2.8722617626190186, - "learning_rate": 3.4906616689356835e-05, - "loss": 0.7209, - "step": 102440 - }, - { - "epoch": 0.9056914018989021, - "grad_norm": 4.4949212074279785, - "learning_rate": 3.490514330168497e-05, - "loss": 0.6218, - "step": 102450 - }, - { - "epoch": 0.9057798051592143, - "grad_norm": 8.657486915588379, - "learning_rate": 3.49036699140131e-05, - "loss": 0.6459, - "step": 102460 - }, - { - "epoch": 0.9058682084195265, - "grad_norm": 2.9187140464782715, - "learning_rate": 3.490219652634123e-05, - "loss": 0.557, - "step": 102470 - }, - { - "epoch": 0.9059566116798388, - "grad_norm": 8.384116172790527, - "learning_rate": 3.4900723138669355e-05, - "loss": 0.7111, - "step": 102480 - }, - { - "epoch": 0.906045014940151, - "grad_norm": 3.899387836456299, - "learning_rate": 3.4899249750997484e-05, - "loss": 0.5597, - "step": 102490 - }, - { - "epoch": 0.9061334182004632, - "grad_norm": 5.011263847351074, - "learning_rate": 3.489777636332561e-05, - "loss": 0.6535, - "step": 102500 - }, - { - "epoch": 0.9062218214607755, - "grad_norm": 1.0646131038665771, - "learning_rate": 3.489630297565375e-05, - "loss": 0.6902, - "step": 102510 - }, - { - "epoch": 0.9063102247210877, - "grad_norm": 2.9479987621307373, - "learning_rate": 3.4894829587981875e-05, - "loss": 0.6589, - "step": 102520 - }, - { - "epoch": 0.9063986279813999, - "grad_norm": 3.5728771686553955, - "learning_rate": 3.4893356200310004e-05, - "loss": 0.529, - "step": 102530 - }, - { - "epoch": 0.9064870312417121, - "grad_norm": 1.4704993963241577, - "learning_rate": 3.489188281263813e-05, - "loss": 0.7254, - "step": 102540 - }, - { - "epoch": 0.9065754345020244, - "grad_norm": 4.313514709472656, - "learning_rate": 3.489040942496626e-05, - "loss": 0.5275, - "step": 102550 - }, - { - "epoch": 0.9066638377623367, - "grad_norm": 4.278264045715332, - "learning_rate": 3.488893603729439e-05, - "loss": 0.7614, - "step": 102560 - }, - { - "epoch": 0.906752241022649, - "grad_norm": 3.022684335708618, - "learning_rate": 3.4887462649622524e-05, - "loss": 0.584, - "step": 102570 - }, - { - "epoch": 0.9068406442829612, - "grad_norm": 2.240894317626953, - "learning_rate": 3.4885989261950646e-05, - "loss": 0.7421, - "step": 102580 - }, - { - "epoch": 0.9069290475432734, - "grad_norm": 2.047661781311035, - "learning_rate": 3.488451587427878e-05, - "loss": 0.6114, - "step": 102590 - }, - { - "epoch": 0.9070174508035856, - "grad_norm": 5.01424503326416, - "learning_rate": 3.488304248660691e-05, - "loss": 0.6426, - "step": 102600 - }, - { - "epoch": 0.9071058540638979, - "grad_norm": 1.331560730934143, - "learning_rate": 3.488156909893504e-05, - "loss": 0.7314, - "step": 102610 - }, - { - "epoch": 0.9071942573242101, - "grad_norm": 1.643265724182129, - "learning_rate": 3.4880095711263166e-05, - "loss": 0.7225, - "step": 102620 - }, - { - "epoch": 0.9072826605845223, - "grad_norm": 5.257772445678711, - "learning_rate": 3.48786223235913e-05, - "loss": 0.6232, - "step": 102630 - }, - { - "epoch": 0.9073710638448346, - "grad_norm": 4.28574800491333, - "learning_rate": 3.487714893591942e-05, - "loss": 0.6348, - "step": 102640 - }, - { - "epoch": 0.9074594671051468, - "grad_norm": 1.86776864528656, - "learning_rate": 3.487567554824756e-05, - "loss": 0.6368, - "step": 102650 - }, - { - "epoch": 0.907547870365459, - "grad_norm": 5.235930919647217, - "learning_rate": 3.4874202160575686e-05, - "loss": 0.6792, - "step": 102660 - }, - { - "epoch": 0.9076362736257714, - "grad_norm": 3.324723243713379, - "learning_rate": 3.4872728772903814e-05, - "loss": 0.6396, - "step": 102670 - }, - { - "epoch": 0.9077246768860836, - "grad_norm": 3.4838247299194336, - "learning_rate": 3.487125538523194e-05, - "loss": 0.791, - "step": 102680 - }, - { - "epoch": 0.9078130801463958, - "grad_norm": 3.4456939697265625, - "learning_rate": 3.486978199756007e-05, - "loss": 0.6986, - "step": 102690 - }, - { - "epoch": 0.9079014834067081, - "grad_norm": 3.2096827030181885, - "learning_rate": 3.48683086098882e-05, - "loss": 0.6852, - "step": 102700 - }, - { - "epoch": 0.9079898866670203, - "grad_norm": 8.984938621520996, - "learning_rate": 3.4866835222216334e-05, - "loss": 0.7243, - "step": 102710 - }, - { - "epoch": 0.9080782899273325, - "grad_norm": 2.8154518604278564, - "learning_rate": 3.4865361834544456e-05, - "loss": 0.6874, - "step": 102720 - }, - { - "epoch": 0.9081666931876448, - "grad_norm": 1.7001768350601196, - "learning_rate": 3.486388844687259e-05, - "loss": 0.5491, - "step": 102730 - }, - { - "epoch": 0.908255096447957, - "grad_norm": 3.0020084381103516, - "learning_rate": 3.486241505920072e-05, - "loss": 0.6214, - "step": 102740 - }, - { - "epoch": 0.9083434997082692, - "grad_norm": 1.2392884492874146, - "learning_rate": 3.486094167152885e-05, - "loss": 0.6687, - "step": 102750 - }, - { - "epoch": 0.9084319029685815, - "grad_norm": 2.5042834281921387, - "learning_rate": 3.4859468283856976e-05, - "loss": 0.7043, - "step": 102760 - }, - { - "epoch": 0.9085203062288937, - "grad_norm": 2.7333014011383057, - "learning_rate": 3.485799489618511e-05, - "loss": 0.7307, - "step": 102770 - }, - { - "epoch": 0.9086087094892059, - "grad_norm": 2.7315826416015625, - "learning_rate": 3.485652150851323e-05, - "loss": 0.6583, - "step": 102780 - }, - { - "epoch": 0.9086971127495183, - "grad_norm": 6.964535713195801, - "learning_rate": 3.485504812084137e-05, - "loss": 0.5731, - "step": 102790 - }, - { - "epoch": 0.9087855160098305, - "grad_norm": 2.9444174766540527, - "learning_rate": 3.485357473316949e-05, - "loss": 0.7838, - "step": 102800 - }, - { - "epoch": 0.9088739192701427, - "grad_norm": 3.931525707244873, - "learning_rate": 3.4852101345497625e-05, - "loss": 0.639, - "step": 102810 - }, - { - "epoch": 0.908962322530455, - "grad_norm": 2.748145818710327, - "learning_rate": 3.485062795782575e-05, - "loss": 0.6461, - "step": 102820 - }, - { - "epoch": 0.9090507257907672, - "grad_norm": 8.574252128601074, - "learning_rate": 3.484915457015388e-05, - "loss": 0.7263, - "step": 102830 - }, - { - "epoch": 0.9091391290510794, - "grad_norm": 9.556109428405762, - "learning_rate": 3.484768118248201e-05, - "loss": 0.5527, - "step": 102840 - }, - { - "epoch": 0.9092275323113916, - "grad_norm": 0.9728810787200928, - "learning_rate": 3.4846207794810145e-05, - "loss": 0.5305, - "step": 102850 - }, - { - "epoch": 0.9093159355717039, - "grad_norm": 6.62507438659668, - "learning_rate": 3.4844734407138267e-05, - "loss": 0.5505, - "step": 102860 - }, - { - "epoch": 0.9094043388320161, - "grad_norm": 4.307148456573486, - "learning_rate": 3.48432610194664e-05, - "loss": 0.7932, - "step": 102870 - }, - { - "epoch": 0.9094927420923283, - "grad_norm": 1.1886510848999023, - "learning_rate": 3.484178763179453e-05, - "loss": 0.5498, - "step": 102880 - }, - { - "epoch": 0.9095811453526406, - "grad_norm": 4.167265892028809, - "learning_rate": 3.484031424412266e-05, - "loss": 0.6188, - "step": 102890 - }, - { - "epoch": 0.9096695486129528, - "grad_norm": 13.898412704467773, - "learning_rate": 3.483884085645079e-05, - "loss": 0.5631, - "step": 102900 - }, - { - "epoch": 0.9097579518732651, - "grad_norm": 1.6460766792297363, - "learning_rate": 3.4837367468778915e-05, - "loss": 0.6439, - "step": 102910 - }, - { - "epoch": 0.9098463551335774, - "grad_norm": 1.5120488405227661, - "learning_rate": 3.4835894081107043e-05, - "loss": 0.7586, - "step": 102920 - }, - { - "epoch": 0.9099347583938896, - "grad_norm": 1.4673289060592651, - "learning_rate": 3.483442069343518e-05, - "loss": 0.7212, - "step": 102930 - }, - { - "epoch": 0.9100231616542018, - "grad_norm": 9.395055770874023, - "learning_rate": 3.48329473057633e-05, - "loss": 0.7843, - "step": 102940 - }, - { - "epoch": 0.9101115649145141, - "grad_norm": 1.9193661212921143, - "learning_rate": 3.4831473918091435e-05, - "loss": 0.6455, - "step": 102950 - }, - { - "epoch": 0.9101999681748263, - "grad_norm": 7.308329105377197, - "learning_rate": 3.4830000530419564e-05, - "loss": 0.7136, - "step": 102960 - }, - { - "epoch": 0.9102883714351385, - "grad_norm": 5.894074440002441, - "learning_rate": 3.482852714274769e-05, - "loss": 0.5992, - "step": 102970 - }, - { - "epoch": 0.9103767746954508, - "grad_norm": 5.067314147949219, - "learning_rate": 3.482705375507582e-05, - "loss": 0.6705, - "step": 102980 - }, - { - "epoch": 0.910465177955763, - "grad_norm": 2.852985143661499, - "learning_rate": 3.4825580367403955e-05, - "loss": 0.7311, - "step": 102990 - }, - { - "epoch": 0.9105535812160752, - "grad_norm": 1.2534308433532715, - "learning_rate": 3.482410697973208e-05, - "loss": 0.6228, - "step": 103000 - }, - { - "epoch": 0.9106419844763874, - "grad_norm": 5.065445899963379, - "learning_rate": 3.482263359206021e-05, - "loss": 0.5424, - "step": 103010 - }, - { - "epoch": 0.9107303877366997, - "grad_norm": 1.254294991493225, - "learning_rate": 3.482116020438834e-05, - "loss": 0.6666, - "step": 103020 - }, - { - "epoch": 0.910818790997012, - "grad_norm": 3.0075409412384033, - "learning_rate": 3.481968681671647e-05, - "loss": 0.6205, - "step": 103030 - }, - { - "epoch": 0.9109071942573242, - "grad_norm": 1.6168415546417236, - "learning_rate": 3.48182134290446e-05, - "loss": 0.6119, - "step": 103040 - }, - { - "epoch": 0.9109955975176365, - "grad_norm": 2.2849974632263184, - "learning_rate": 3.4816740041372726e-05, - "loss": 0.548, - "step": 103050 - }, - { - "epoch": 0.9110840007779487, - "grad_norm": 1.2676775455474854, - "learning_rate": 3.4815266653700854e-05, - "loss": 0.7602, - "step": 103060 - }, - { - "epoch": 0.9111724040382609, - "grad_norm": 7.243632793426514, - "learning_rate": 3.481379326602899e-05, - "loss": 0.5948, - "step": 103070 - }, - { - "epoch": 0.9112608072985732, - "grad_norm": 1.9729849100112915, - "learning_rate": 3.481231987835711e-05, - "loss": 0.5694, - "step": 103080 - }, - { - "epoch": 0.9113492105588854, - "grad_norm": 2.3196303844451904, - "learning_rate": 3.4810846490685246e-05, - "loss": 0.6395, - "step": 103090 - }, - { - "epoch": 0.9114376138191976, - "grad_norm": 8.909586906433105, - "learning_rate": 3.4809373103013374e-05, - "loss": 0.6553, - "step": 103100 - }, - { - "epoch": 0.9115260170795099, - "grad_norm": 2.7305219173431396, - "learning_rate": 3.48078997153415e-05, - "loss": 0.5914, - "step": 103110 - }, - { - "epoch": 0.9116144203398221, - "grad_norm": 12.29466438293457, - "learning_rate": 3.480642632766963e-05, - "loss": 0.7054, - "step": 103120 - }, - { - "epoch": 0.9117028236001343, - "grad_norm": 1.8778612613677979, - "learning_rate": 3.4804952939997766e-05, - "loss": 0.7085, - "step": 103130 - }, - { - "epoch": 0.9117912268604467, - "grad_norm": 23.512847900390625, - "learning_rate": 3.480347955232589e-05, - "loss": 0.6218, - "step": 103140 - }, - { - "epoch": 0.9118796301207589, - "grad_norm": 6.219461441040039, - "learning_rate": 3.480200616465402e-05, - "loss": 0.795, - "step": 103150 - }, - { - "epoch": 0.9119680333810711, - "grad_norm": 10.840205192565918, - "learning_rate": 3.4800532776982144e-05, - "loss": 0.675, - "step": 103160 - }, - { - "epoch": 0.9120564366413834, - "grad_norm": 3.672513723373413, - "learning_rate": 3.479905938931028e-05, - "loss": 0.7038, - "step": 103170 - }, - { - "epoch": 0.9121448399016956, - "grad_norm": 4.021480560302734, - "learning_rate": 3.479758600163841e-05, - "loss": 0.6334, - "step": 103180 - }, - { - "epoch": 0.9122332431620078, - "grad_norm": 1.8859593868255615, - "learning_rate": 3.4796112613966536e-05, - "loss": 0.6976, - "step": 103190 - }, - { - "epoch": 0.91232164642232, - "grad_norm": 2.477224826812744, - "learning_rate": 3.4794639226294664e-05, - "loss": 0.7565, - "step": 103200 - }, - { - "epoch": 0.9124100496826323, - "grad_norm": 3.812579870223999, - "learning_rate": 3.47931658386228e-05, - "loss": 0.6198, - "step": 103210 - }, - { - "epoch": 0.9124984529429445, - "grad_norm": 7.9026970863342285, - "learning_rate": 3.479169245095092e-05, - "loss": 0.582, - "step": 103220 - }, - { - "epoch": 0.9125868562032567, - "grad_norm": 2.329782009124756, - "learning_rate": 3.4790219063279056e-05, - "loss": 0.6175, - "step": 103230 - }, - { - "epoch": 0.912675259463569, - "grad_norm": 2.1884875297546387, - "learning_rate": 3.4788745675607185e-05, - "loss": 0.5448, - "step": 103240 - }, - { - "epoch": 0.9127636627238812, - "grad_norm": 3.888169288635254, - "learning_rate": 3.478727228793531e-05, - "loss": 0.7435, - "step": 103250 - }, - { - "epoch": 0.9128520659841936, - "grad_norm": 6.73412561416626, - "learning_rate": 3.478579890026344e-05, - "loss": 0.6516, - "step": 103260 - }, - { - "epoch": 0.9129404692445058, - "grad_norm": 5.661271572113037, - "learning_rate": 3.478432551259157e-05, - "loss": 0.5616, - "step": 103270 - }, - { - "epoch": 0.913028872504818, - "grad_norm": 5.905777931213379, - "learning_rate": 3.4782852124919705e-05, - "loss": 0.5896, - "step": 103280 - }, - { - "epoch": 0.9131172757651302, - "grad_norm": 1.270121693611145, - "learning_rate": 3.478137873724783e-05, - "loss": 0.5912, - "step": 103290 - }, - { - "epoch": 0.9132056790254425, - "grad_norm": 3.8103644847869873, - "learning_rate": 3.477990534957596e-05, - "loss": 0.6009, - "step": 103300 - }, - { - "epoch": 0.9132940822857547, - "grad_norm": 21.119020462036133, - "learning_rate": 3.477843196190409e-05, - "loss": 0.636, - "step": 103310 - }, - { - "epoch": 0.9133824855460669, - "grad_norm": 4.260202407836914, - "learning_rate": 3.477695857423222e-05, - "loss": 0.6174, - "step": 103320 - }, - { - "epoch": 0.9134708888063792, - "grad_norm": 1.796023964881897, - "learning_rate": 3.477548518656035e-05, - "loss": 0.6095, - "step": 103330 - }, - { - "epoch": 0.9135592920666914, - "grad_norm": 1.3518072366714478, - "learning_rate": 3.477401179888848e-05, - "loss": 0.6585, - "step": 103340 - }, - { - "epoch": 0.9136476953270036, - "grad_norm": 6.710455417633057, - "learning_rate": 3.477253841121661e-05, - "loss": 0.6919, - "step": 103350 - }, - { - "epoch": 0.9137360985873159, - "grad_norm": 1.6725014448165894, - "learning_rate": 3.477106502354474e-05, - "loss": 0.6039, - "step": 103360 - }, - { - "epoch": 0.9138245018476281, - "grad_norm": 5.248466968536377, - "learning_rate": 3.476959163587287e-05, - "loss": 0.6507, - "step": 103370 - }, - { - "epoch": 0.9139129051079404, - "grad_norm": 3.784817695617676, - "learning_rate": 3.4768118248200995e-05, - "loss": 0.6993, - "step": 103380 - }, - { - "epoch": 0.9140013083682527, - "grad_norm": 6.656286716461182, - "learning_rate": 3.4766644860529124e-05, - "loss": 0.6885, - "step": 103390 - }, - { - "epoch": 0.9140897116285649, - "grad_norm": 6.79155969619751, - "learning_rate": 3.476517147285726e-05, - "loss": 0.6063, - "step": 103400 - }, - { - "epoch": 0.9141781148888771, - "grad_norm": 3.4230575561523438, - "learning_rate": 3.476369808518538e-05, - "loss": 0.754, - "step": 103410 - }, - { - "epoch": 0.9142665181491894, - "grad_norm": 1.6727087497711182, - "learning_rate": 3.4762224697513515e-05, - "loss": 0.5201, - "step": 103420 - }, - { - "epoch": 0.9143549214095016, - "grad_norm": 2.6733951568603516, - "learning_rate": 3.4760751309841644e-05, - "loss": 0.5719, - "step": 103430 - }, - { - "epoch": 0.9144433246698138, - "grad_norm": 2.1525189876556396, - "learning_rate": 3.475927792216977e-05, - "loss": 0.7658, - "step": 103440 - }, - { - "epoch": 0.914531727930126, - "grad_norm": 6.747961521148682, - "learning_rate": 3.47578045344979e-05, - "loss": 0.6899, - "step": 103450 - }, - { - "epoch": 0.9146201311904383, - "grad_norm": 10.989623069763184, - "learning_rate": 3.4756331146826036e-05, - "loss": 0.5678, - "step": 103460 - }, - { - "epoch": 0.9147085344507505, - "grad_norm": 17.048847198486328, - "learning_rate": 3.475485775915416e-05, - "loss": 0.5074, - "step": 103470 - }, - { - "epoch": 0.9147969377110627, - "grad_norm": 1.4350841045379639, - "learning_rate": 3.475338437148229e-05, - "loss": 0.6783, - "step": 103480 - }, - { - "epoch": 0.914885340971375, - "grad_norm": 2.285968542098999, - "learning_rate": 3.475191098381042e-05, - "loss": 0.6917, - "step": 103490 - }, - { - "epoch": 0.9149737442316873, - "grad_norm": 5.684361934661865, - "learning_rate": 3.475043759613855e-05, - "loss": 0.6848, - "step": 103500 - }, - { - "epoch": 0.9150621474919995, - "grad_norm": 5.974975109100342, - "learning_rate": 3.474896420846668e-05, - "loss": 0.7555, - "step": 103510 - }, - { - "epoch": 0.9151505507523118, - "grad_norm": 8.389676094055176, - "learning_rate": 3.4747490820794806e-05, - "loss": 0.6622, - "step": 103520 - }, - { - "epoch": 0.915238954012624, - "grad_norm": 7.66300630569458, - "learning_rate": 3.4746017433122934e-05, - "loss": 0.6401, - "step": 103530 - }, - { - "epoch": 0.9153273572729362, - "grad_norm": 1.6489628553390503, - "learning_rate": 3.474454404545107e-05, - "loss": 0.7141, - "step": 103540 - }, - { - "epoch": 0.9154157605332485, - "grad_norm": 5.561105251312256, - "learning_rate": 3.474307065777919e-05, - "loss": 0.5689, - "step": 103550 - }, - { - "epoch": 0.9155041637935607, - "grad_norm": 3.733860731124878, - "learning_rate": 3.4741597270107326e-05, - "loss": 0.7455, - "step": 103560 - }, - { - "epoch": 0.9155925670538729, - "grad_norm": 10.259614944458008, - "learning_rate": 3.4740123882435454e-05, - "loss": 0.6487, - "step": 103570 - }, - { - "epoch": 0.9156809703141852, - "grad_norm": 4.322904109954834, - "learning_rate": 3.473865049476358e-05, - "loss": 0.7696, - "step": 103580 - }, - { - "epoch": 0.9157693735744974, - "grad_norm": 2.1287317276000977, - "learning_rate": 3.473717710709171e-05, - "loss": 0.7048, - "step": 103590 - }, - { - "epoch": 0.9158577768348096, - "grad_norm": 3.1804282665252686, - "learning_rate": 3.4735703719419846e-05, - "loss": 0.5895, - "step": 103600 - }, - { - "epoch": 0.9159461800951219, - "grad_norm": 2.5047860145568848, - "learning_rate": 3.473423033174797e-05, - "loss": 0.5911, - "step": 103610 - }, - { - "epoch": 0.9160345833554342, - "grad_norm": 3.1896352767944336, - "learning_rate": 3.47327569440761e-05, - "loss": 0.5795, - "step": 103620 - }, - { - "epoch": 0.9161229866157464, - "grad_norm": 0.9608718752861023, - "learning_rate": 3.4731283556404224e-05, - "loss": 0.5384, - "step": 103630 - }, - { - "epoch": 0.9162113898760587, - "grad_norm": 1.7372419834136963, - "learning_rate": 3.472981016873236e-05, - "loss": 0.6734, - "step": 103640 - }, - { - "epoch": 0.9162997931363709, - "grad_norm": 1.8831795454025269, - "learning_rate": 3.472833678106049e-05, - "loss": 0.5582, - "step": 103650 - }, - { - "epoch": 0.9163881963966831, - "grad_norm": 3.647759199142456, - "learning_rate": 3.4726863393388616e-05, - "loss": 0.5827, - "step": 103660 - }, - { - "epoch": 0.9164765996569953, - "grad_norm": 3.0250210762023926, - "learning_rate": 3.4725390005716745e-05, - "loss": 0.6757, - "step": 103670 - }, - { - "epoch": 0.9165650029173076, - "grad_norm": 17.237476348876953, - "learning_rate": 3.472391661804488e-05, - "loss": 0.7058, - "step": 103680 - }, - { - "epoch": 0.9166534061776198, - "grad_norm": 3.7470288276672363, - "learning_rate": 3.4722443230373e-05, - "loss": 0.7271, - "step": 103690 - }, - { - "epoch": 0.916741809437932, - "grad_norm": 1.799844741821289, - "learning_rate": 3.4720969842701136e-05, - "loss": 0.806, - "step": 103700 - }, - { - "epoch": 0.9168302126982443, - "grad_norm": 2.8860955238342285, - "learning_rate": 3.4719496455029265e-05, - "loss": 0.6154, - "step": 103710 - }, - { - "epoch": 0.9169186159585565, - "grad_norm": 5.958944797515869, - "learning_rate": 3.471802306735739e-05, - "loss": 0.7982, - "step": 103720 - }, - { - "epoch": 0.9170070192188688, - "grad_norm": 7.22694730758667, - "learning_rate": 3.471654967968552e-05, - "loss": 0.7488, - "step": 103730 - }, - { - "epoch": 0.9170954224791811, - "grad_norm": 8.265280723571777, - "learning_rate": 3.471507629201365e-05, - "loss": 0.7001, - "step": 103740 - }, - { - "epoch": 0.9171838257394933, - "grad_norm": 6.297915935516357, - "learning_rate": 3.471360290434178e-05, - "loss": 0.6241, - "step": 103750 - }, - { - "epoch": 0.9172722289998055, - "grad_norm": 1.8345996141433716, - "learning_rate": 3.471212951666991e-05, - "loss": 0.6995, - "step": 103760 - }, - { - "epoch": 0.9173606322601178, - "grad_norm": 1.3702058792114258, - "learning_rate": 3.4710656128998035e-05, - "loss": 0.6059, - "step": 103770 - }, - { - "epoch": 0.91744903552043, - "grad_norm": 1.0202032327651978, - "learning_rate": 3.470918274132617e-05, - "loss": 0.6367, - "step": 103780 - }, - { - "epoch": 0.9175374387807422, - "grad_norm": 3.577991485595703, - "learning_rate": 3.47077093536543e-05, - "loss": 0.5804, - "step": 103790 - }, - { - "epoch": 0.9176258420410545, - "grad_norm": 1.8480812311172485, - "learning_rate": 3.470623596598243e-05, - "loss": 0.5703, - "step": 103800 - }, - { - "epoch": 0.9177142453013667, - "grad_norm": 2.2506215572357178, - "learning_rate": 3.4704762578310555e-05, - "loss": 0.6877, - "step": 103810 - }, - { - "epoch": 0.9178026485616789, - "grad_norm": 3.1972687244415283, - "learning_rate": 3.470328919063869e-05, - "loss": 0.6196, - "step": 103820 - }, - { - "epoch": 0.9178910518219912, - "grad_norm": 3.169938325881958, - "learning_rate": 3.470181580296681e-05, - "loss": 0.7236, - "step": 103830 - }, - { - "epoch": 0.9179794550823034, - "grad_norm": 2.4742043018341064, - "learning_rate": 3.470034241529495e-05, - "loss": 0.6334, - "step": 103840 - }, - { - "epoch": 0.9180678583426157, - "grad_norm": 2.081242084503174, - "learning_rate": 3.469886902762307e-05, - "loss": 0.5269, - "step": 103850 - }, - { - "epoch": 0.918156261602928, - "grad_norm": 1.9559173583984375, - "learning_rate": 3.4697395639951204e-05, - "loss": 0.7655, - "step": 103860 - }, - { - "epoch": 0.9182446648632402, - "grad_norm": 3.1912856101989746, - "learning_rate": 3.469592225227933e-05, - "loss": 0.6924, - "step": 103870 - }, - { - "epoch": 0.9183330681235524, - "grad_norm": 1.9441215991973877, - "learning_rate": 3.469444886460746e-05, - "loss": 0.7904, - "step": 103880 - }, - { - "epoch": 0.9184214713838647, - "grad_norm": 0.9783027172088623, - "learning_rate": 3.469297547693559e-05, - "loss": 0.7327, - "step": 103890 - }, - { - "epoch": 0.9185098746441769, - "grad_norm": 1.1540257930755615, - "learning_rate": 3.4691502089263724e-05, - "loss": 0.6338, - "step": 103900 - }, - { - "epoch": 0.9185982779044891, - "grad_norm": 3.16699481010437, - "learning_rate": 3.4690028701591845e-05, - "loss": 0.529, - "step": 103910 - }, - { - "epoch": 0.9186866811648013, - "grad_norm": 1.4021426439285278, - "learning_rate": 3.468855531391998e-05, - "loss": 0.5532, - "step": 103920 - }, - { - "epoch": 0.9187750844251136, - "grad_norm": 3.0290215015411377, - "learning_rate": 3.468708192624811e-05, - "loss": 0.481, - "step": 103930 - }, - { - "epoch": 0.9188634876854258, - "grad_norm": 1.3256397247314453, - "learning_rate": 3.468560853857624e-05, - "loss": 0.6867, - "step": 103940 - }, - { - "epoch": 0.918951890945738, - "grad_norm": 1.932599425315857, - "learning_rate": 3.4684135150904366e-05, - "loss": 0.6652, - "step": 103950 - }, - { - "epoch": 0.9190402942060503, - "grad_norm": 2.8910319805145264, - "learning_rate": 3.46826617632325e-05, - "loss": 0.7193, - "step": 103960 - }, - { - "epoch": 0.9191286974663626, - "grad_norm": 1.3356348276138306, - "learning_rate": 3.468118837556062e-05, - "loss": 0.6138, - "step": 103970 - }, - { - "epoch": 0.9192171007266748, - "grad_norm": 1.0583598613739014, - "learning_rate": 3.467971498788876e-05, - "loss": 0.5298, - "step": 103980 - }, - { - "epoch": 0.9193055039869871, - "grad_norm": 3.0338141918182373, - "learning_rate": 3.467824160021688e-05, - "loss": 0.6652, - "step": 103990 - }, - { - "epoch": 0.9193939072472993, - "grad_norm": 0.8351437449455261, - "learning_rate": 3.4676768212545014e-05, - "loss": 0.5462, - "step": 104000 - }, - { - "epoch": 0.9194823105076115, - "grad_norm": 4.577482223510742, - "learning_rate": 3.467529482487314e-05, - "loss": 0.7125, - "step": 104010 - }, - { - "epoch": 0.9195707137679238, - "grad_norm": 7.595093727111816, - "learning_rate": 3.467382143720127e-05, - "loss": 0.8144, - "step": 104020 - }, - { - "epoch": 0.919659117028236, - "grad_norm": 2.464247703552246, - "learning_rate": 3.46723480495294e-05, - "loss": 0.761, - "step": 104030 - }, - { - "epoch": 0.9197475202885482, - "grad_norm": 3.4661977291107178, - "learning_rate": 3.4670874661857534e-05, - "loss": 0.7069, - "step": 104040 - }, - { - "epoch": 0.9198359235488605, - "grad_norm": 3.049800157546997, - "learning_rate": 3.4669401274185656e-05, - "loss": 0.7475, - "step": 104050 - }, - { - "epoch": 0.9199243268091727, - "grad_norm": 4.5359416007995605, - "learning_rate": 3.466792788651379e-05, - "loss": 0.6148, - "step": 104060 - }, - { - "epoch": 0.9200127300694849, - "grad_norm": 5.870856761932373, - "learning_rate": 3.466645449884192e-05, - "loss": 0.5147, - "step": 104070 - }, - { - "epoch": 0.9201011333297971, - "grad_norm": 5.160520553588867, - "learning_rate": 3.466498111117005e-05, - "loss": 0.663, - "step": 104080 - }, - { - "epoch": 0.9201895365901095, - "grad_norm": 3.9021685123443604, - "learning_rate": 3.4663507723498176e-05, - "loss": 0.5629, - "step": 104090 - }, - { - "epoch": 0.9202779398504217, - "grad_norm": 0.8227945566177368, - "learning_rate": 3.4662034335826304e-05, - "loss": 0.5951, - "step": 104100 - }, - { - "epoch": 0.920366343110734, - "grad_norm": 5.3351335525512695, - "learning_rate": 3.466056094815443e-05, - "loss": 0.5594, - "step": 104110 - }, - { - "epoch": 0.9204547463710462, - "grad_norm": 2.647221326828003, - "learning_rate": 3.465908756048257e-05, - "loss": 0.6732, - "step": 104120 - }, - { - "epoch": 0.9205431496313584, - "grad_norm": 3.9328064918518066, - "learning_rate": 3.4657614172810696e-05, - "loss": 0.717, - "step": 104130 - }, - { - "epoch": 0.9206315528916706, - "grad_norm": 3.648874044418335, - "learning_rate": 3.4656140785138825e-05, - "loss": 0.6789, - "step": 104140 - }, - { - "epoch": 0.9207199561519829, - "grad_norm": 4.600849628448486, - "learning_rate": 3.465466739746695e-05, - "loss": 0.6565, - "step": 104150 - }, - { - "epoch": 0.9208083594122951, - "grad_norm": 0.8460045456886292, - "learning_rate": 3.465319400979508e-05, - "loss": 0.6734, - "step": 104160 - }, - { - "epoch": 0.9208967626726073, - "grad_norm": 3.087242841720581, - "learning_rate": 3.465172062212321e-05, - "loss": 0.7499, - "step": 104170 - }, - { - "epoch": 0.9209851659329196, - "grad_norm": 6.493686676025391, - "learning_rate": 3.4650247234451345e-05, - "loss": 0.6004, - "step": 104180 - }, - { - "epoch": 0.9210735691932318, - "grad_norm": 3.5971786975860596, - "learning_rate": 3.464877384677947e-05, - "loss": 0.7482, - "step": 104190 - }, - { - "epoch": 0.921161972453544, - "grad_norm": 5.517910957336426, - "learning_rate": 3.46473004591076e-05, - "loss": 0.6536, - "step": 104200 - }, - { - "epoch": 0.9212503757138564, - "grad_norm": 2.8508706092834473, - "learning_rate": 3.464582707143573e-05, - "loss": 0.5906, - "step": 104210 - }, - { - "epoch": 0.9213387789741686, - "grad_norm": 1.2400574684143066, - "learning_rate": 3.464435368376386e-05, - "loss": 0.5976, - "step": 104220 - }, - { - "epoch": 0.9214271822344808, - "grad_norm": 3.411325216293335, - "learning_rate": 3.4642880296091987e-05, - "loss": 0.6162, - "step": 104230 - }, - { - "epoch": 0.9215155854947931, - "grad_norm": 2.050952911376953, - "learning_rate": 3.4641406908420115e-05, - "loss": 0.6717, - "step": 104240 - }, - { - "epoch": 0.9216039887551053, - "grad_norm": 1.7029166221618652, - "learning_rate": 3.463993352074825e-05, - "loss": 0.7187, - "step": 104250 - }, - { - "epoch": 0.9216923920154175, - "grad_norm": 4.325075626373291, - "learning_rate": 3.463846013307638e-05, - "loss": 0.7069, - "step": 104260 - }, - { - "epoch": 0.9217807952757298, - "grad_norm": 2.1368467807769775, - "learning_rate": 3.463698674540451e-05, - "loss": 0.6641, - "step": 104270 - }, - { - "epoch": 0.921869198536042, - "grad_norm": 2.4817216396331787, - "learning_rate": 3.4635513357732635e-05, - "loss": 0.5852, - "step": 104280 - }, - { - "epoch": 0.9219576017963542, - "grad_norm": 14.611276626586914, - "learning_rate": 3.4634039970060763e-05, - "loss": 0.633, - "step": 104290 - }, - { - "epoch": 0.9220460050566665, - "grad_norm": 2.314023733139038, - "learning_rate": 3.463256658238889e-05, - "loss": 0.6533, - "step": 104300 - }, - { - "epoch": 0.9221344083169787, - "grad_norm": 4.543471336364746, - "learning_rate": 3.463109319471703e-05, - "loss": 0.6756, - "step": 104310 - }, - { - "epoch": 0.922222811577291, - "grad_norm": 2.733240842819214, - "learning_rate": 3.462961980704515e-05, - "loss": 0.7268, - "step": 104320 - }, - { - "epoch": 0.9223112148376033, - "grad_norm": 3.4160687923431396, - "learning_rate": 3.4628146419373284e-05, - "loss": 0.6346, - "step": 104330 - }, - { - "epoch": 0.9223996180979155, - "grad_norm": 4.000524997711182, - "learning_rate": 3.462667303170141e-05, - "loss": 0.5833, - "step": 104340 - }, - { - "epoch": 0.9224880213582277, - "grad_norm": 3.380833387374878, - "learning_rate": 3.462519964402954e-05, - "loss": 0.6004, - "step": 104350 - }, - { - "epoch": 0.92257642461854, - "grad_norm": 7.3308820724487305, - "learning_rate": 3.462372625635767e-05, - "loss": 0.6014, - "step": 104360 - }, - { - "epoch": 0.9226648278788522, - "grad_norm": 2.7283005714416504, - "learning_rate": 3.4622252868685804e-05, - "loss": 0.5989, - "step": 104370 - }, - { - "epoch": 0.9227532311391644, - "grad_norm": 2.2377896308898926, - "learning_rate": 3.4620779481013925e-05, - "loss": 0.6806, - "step": 104380 - }, - { - "epoch": 0.9228416343994766, - "grad_norm": 2.443495988845825, - "learning_rate": 3.461930609334206e-05, - "loss": 0.664, - "step": 104390 - }, - { - "epoch": 0.9229300376597889, - "grad_norm": 2.7558369636535645, - "learning_rate": 3.461783270567019e-05, - "loss": 0.7429, - "step": 104400 - }, - { - "epoch": 0.9230184409201011, - "grad_norm": 1.415323257446289, - "learning_rate": 3.461635931799832e-05, - "loss": 0.7924, - "step": 104410 - }, - { - "epoch": 0.9231068441804133, - "grad_norm": 3.840822696685791, - "learning_rate": 3.4614885930326446e-05, - "loss": 0.6435, - "step": 104420 - }, - { - "epoch": 0.9231952474407256, - "grad_norm": 2.3900983333587646, - "learning_rate": 3.461341254265458e-05, - "loss": 0.7044, - "step": 104430 - }, - { - "epoch": 0.9232836507010379, - "grad_norm": 2.2705886363983154, - "learning_rate": 3.46119391549827e-05, - "loss": 0.6203, - "step": 104440 - }, - { - "epoch": 0.9233720539613501, - "grad_norm": 1.9621963500976562, - "learning_rate": 3.461046576731084e-05, - "loss": 0.8105, - "step": 104450 - }, - { - "epoch": 0.9234604572216624, - "grad_norm": 1.57456374168396, - "learning_rate": 3.460899237963896e-05, - "loss": 0.6263, - "step": 104460 - }, - { - "epoch": 0.9235488604819746, - "grad_norm": 1.8862709999084473, - "learning_rate": 3.4607518991967094e-05, - "loss": 0.6521, - "step": 104470 - }, - { - "epoch": 0.9236372637422868, - "grad_norm": 2.4278106689453125, - "learning_rate": 3.460604560429522e-05, - "loss": 0.7634, - "step": 104480 - }, - { - "epoch": 0.9237256670025991, - "grad_norm": 1.6628830432891846, - "learning_rate": 3.460457221662335e-05, - "loss": 0.6415, - "step": 104490 - }, - { - "epoch": 0.9238140702629113, - "grad_norm": 3.358079671859741, - "learning_rate": 3.460309882895148e-05, - "loss": 0.6356, - "step": 104500 - }, - { - "epoch": 0.9239024735232235, - "grad_norm": 1.7938374280929565, - "learning_rate": 3.4601625441279614e-05, - "loss": 0.7198, - "step": 104510 - }, - { - "epoch": 0.9239908767835358, - "grad_norm": 4.427228927612305, - "learning_rate": 3.4600152053607736e-05, - "loss": 0.6924, - "step": 104520 - }, - { - "epoch": 0.924079280043848, - "grad_norm": 8.362548828125, - "learning_rate": 3.459867866593587e-05, - "loss": 0.6429, - "step": 104530 - }, - { - "epoch": 0.9241676833041602, - "grad_norm": 1.2016392946243286, - "learning_rate": 3.4597205278264e-05, - "loss": 0.6864, - "step": 104540 - }, - { - "epoch": 0.9242560865644724, - "grad_norm": 6.469966888427734, - "learning_rate": 3.459573189059213e-05, - "loss": 0.5586, - "step": 104550 - }, - { - "epoch": 0.9243444898247848, - "grad_norm": 6.511034965515137, - "learning_rate": 3.4594258502920256e-05, - "loss": 0.6893, - "step": 104560 - }, - { - "epoch": 0.924432893085097, - "grad_norm": 1.8520325422286987, - "learning_rate": 3.4592785115248384e-05, - "loss": 0.6149, - "step": 104570 - }, - { - "epoch": 0.9245212963454092, - "grad_norm": 2.806547164916992, - "learning_rate": 3.459131172757651e-05, - "loss": 0.7918, - "step": 104580 - }, - { - "epoch": 0.9246096996057215, - "grad_norm": 1.0989857912063599, - "learning_rate": 3.458983833990465e-05, - "loss": 0.7532, - "step": 104590 - }, - { - "epoch": 0.9246981028660337, - "grad_norm": 4.717621326446533, - "learning_rate": 3.458836495223277e-05, - "loss": 0.7444, - "step": 104600 - }, - { - "epoch": 0.9247865061263459, - "grad_norm": 1.115357518196106, - "learning_rate": 3.4586891564560905e-05, - "loss": 0.6074, - "step": 104610 - }, - { - "epoch": 0.9248749093866582, - "grad_norm": 0.9762543439865112, - "learning_rate": 3.458541817688903e-05, - "loss": 0.6751, - "step": 104620 - }, - { - "epoch": 0.9249633126469704, - "grad_norm": 3.491162061691284, - "learning_rate": 3.458394478921716e-05, - "loss": 0.7316, - "step": 104630 - }, - { - "epoch": 0.9250517159072826, - "grad_norm": 3.6603012084960938, - "learning_rate": 3.458247140154529e-05, - "loss": 0.8362, - "step": 104640 - }, - { - "epoch": 0.9251401191675949, - "grad_norm": 1.6165461540222168, - "learning_rate": 3.4580998013873425e-05, - "loss": 0.6194, - "step": 104650 - }, - { - "epoch": 0.9252285224279071, - "grad_norm": 2.929722547531128, - "learning_rate": 3.4579524626201546e-05, - "loss": 0.6733, - "step": 104660 - }, - { - "epoch": 0.9253169256882193, - "grad_norm": 5.379761695861816, - "learning_rate": 3.457805123852968e-05, - "loss": 0.7932, - "step": 104670 - }, - { - "epoch": 0.9254053289485317, - "grad_norm": 1.9697751998901367, - "learning_rate": 3.45765778508578e-05, - "loss": 0.6643, - "step": 104680 - }, - { - "epoch": 0.9254937322088439, - "grad_norm": 3.5992343425750732, - "learning_rate": 3.457510446318594e-05, - "loss": 0.7967, - "step": 104690 - }, - { - "epoch": 0.9255821354691561, - "grad_norm": 2.030086040496826, - "learning_rate": 3.457363107551407e-05, - "loss": 0.7189, - "step": 104700 - }, - { - "epoch": 0.9256705387294684, - "grad_norm": 28.58447265625, - "learning_rate": 3.4572157687842195e-05, - "loss": 0.6581, - "step": 104710 - }, - { - "epoch": 0.9257589419897806, - "grad_norm": 2.21335768699646, - "learning_rate": 3.457068430017032e-05, - "loss": 0.641, - "step": 104720 - }, - { - "epoch": 0.9258473452500928, - "grad_norm": 1.9996507167816162, - "learning_rate": 3.456921091249846e-05, - "loss": 0.6598, - "step": 104730 - }, - { - "epoch": 0.925935748510405, - "grad_norm": 1.8208813667297363, - "learning_rate": 3.456773752482658e-05, - "loss": 0.6008, - "step": 104740 - }, - { - "epoch": 0.9260241517707173, - "grad_norm": 1.8049731254577637, - "learning_rate": 3.4566264137154715e-05, - "loss": 0.6254, - "step": 104750 - }, - { - "epoch": 0.9261125550310295, - "grad_norm": 3.426121473312378, - "learning_rate": 3.4564790749482844e-05, - "loss": 0.6932, - "step": 104760 - }, - { - "epoch": 0.9262009582913417, - "grad_norm": 1.122986078262329, - "learning_rate": 3.456331736181097e-05, - "loss": 0.5211, - "step": 104770 - }, - { - "epoch": 0.926289361551654, - "grad_norm": 7.839140892028809, - "learning_rate": 3.45618439741391e-05, - "loss": 0.6795, - "step": 104780 - }, - { - "epoch": 0.9263777648119663, - "grad_norm": 2.6903767585754395, - "learning_rate": 3.456037058646723e-05, - "loss": 0.5912, - "step": 104790 - }, - { - "epoch": 0.9264661680722786, - "grad_norm": 2.958411693572998, - "learning_rate": 3.455889719879536e-05, - "loss": 0.7027, - "step": 104800 - }, - { - "epoch": 0.9265545713325908, - "grad_norm": 3.4592723846435547, - "learning_rate": 3.455742381112349e-05, - "loss": 0.6906, - "step": 104810 - }, - { - "epoch": 0.926642974592903, - "grad_norm": 5.114811897277832, - "learning_rate": 3.4555950423451614e-05, - "loss": 0.6953, - "step": 104820 - }, - { - "epoch": 0.9267313778532152, - "grad_norm": 2.9148213863372803, - "learning_rate": 3.455447703577975e-05, - "loss": 0.5997, - "step": 104830 - }, - { - "epoch": 0.9268197811135275, - "grad_norm": 2.3010993003845215, - "learning_rate": 3.455300364810788e-05, - "loss": 0.6917, - "step": 104840 - }, - { - "epoch": 0.9269081843738397, - "grad_norm": 2.3580894470214844, - "learning_rate": 3.4551530260436006e-05, - "loss": 0.6295, - "step": 104850 - }, - { - "epoch": 0.9269965876341519, - "grad_norm": 2.740551471710205, - "learning_rate": 3.4550056872764134e-05, - "loss": 0.6211, - "step": 104860 - }, - { - "epoch": 0.9270849908944642, - "grad_norm": 3.4254167079925537, - "learning_rate": 3.454858348509227e-05, - "loss": 0.5244, - "step": 104870 - }, - { - "epoch": 0.9271733941547764, - "grad_norm": 1.171040654182434, - "learning_rate": 3.454711009742039e-05, - "loss": 0.7817, - "step": 104880 - }, - { - "epoch": 0.9272617974150886, - "grad_norm": 5.224419593811035, - "learning_rate": 3.4545636709748526e-05, - "loss": 0.6389, - "step": 104890 - }, - { - "epoch": 0.9273502006754009, - "grad_norm": 4.985189437866211, - "learning_rate": 3.4544163322076654e-05, - "loss": 0.6284, - "step": 104900 - }, - { - "epoch": 0.9274386039357132, - "grad_norm": 4.505568504333496, - "learning_rate": 3.454268993440478e-05, - "loss": 0.6461, - "step": 104910 - }, - { - "epoch": 0.9275270071960254, - "grad_norm": 2.198920488357544, - "learning_rate": 3.454121654673291e-05, - "loss": 0.7069, - "step": 104920 - }, - { - "epoch": 0.9276154104563377, - "grad_norm": 2.271940231323242, - "learning_rate": 3.453974315906104e-05, - "loss": 0.593, - "step": 104930 - }, - { - "epoch": 0.9277038137166499, - "grad_norm": 0.9303901791572571, - "learning_rate": 3.453826977138917e-05, - "loss": 0.6362, - "step": 104940 - }, - { - "epoch": 0.9277922169769621, - "grad_norm": 3.752889394760132, - "learning_rate": 3.45367963837173e-05, - "loss": 0.6494, - "step": 104950 - }, - { - "epoch": 0.9278806202372744, - "grad_norm": 9.621313095092773, - "learning_rate": 3.4535322996045424e-05, - "loss": 0.6087, - "step": 104960 - }, - { - "epoch": 0.9279690234975866, - "grad_norm": 10.74526309967041, - "learning_rate": 3.453384960837356e-05, - "loss": 0.5853, - "step": 104970 - }, - { - "epoch": 0.9280574267578988, - "grad_norm": 8.03128433227539, - "learning_rate": 3.453237622070169e-05, - "loss": 0.6375, - "step": 104980 - }, - { - "epoch": 0.928145830018211, - "grad_norm": 2.8632171154022217, - "learning_rate": 3.4530902833029816e-05, - "loss": 0.5752, - "step": 104990 - }, - { - "epoch": 0.9282342332785233, - "grad_norm": 1.5016505718231201, - "learning_rate": 3.4529429445357944e-05, - "loss": 0.6637, - "step": 105000 - }, - { - "epoch": 0.9283226365388355, - "grad_norm": 3.3937366008758545, - "learning_rate": 3.452795605768608e-05, - "loss": 0.6431, - "step": 105010 - }, - { - "epoch": 0.9284110397991477, - "grad_norm": 8.214386940002441, - "learning_rate": 3.45264826700142e-05, - "loss": 0.7078, - "step": 105020 - }, - { - "epoch": 0.9284994430594601, - "grad_norm": 4.080697536468506, - "learning_rate": 3.4525009282342336e-05, - "loss": 0.5912, - "step": 105030 - }, - { - "epoch": 0.9285878463197723, - "grad_norm": 7.324512958526611, - "learning_rate": 3.4523535894670465e-05, - "loss": 0.7108, - "step": 105040 - }, - { - "epoch": 0.9286762495800845, - "grad_norm": 1.087760090827942, - "learning_rate": 3.452206250699859e-05, - "loss": 0.5203, - "step": 105050 - }, - { - "epoch": 0.9287646528403968, - "grad_norm": 3.406646490097046, - "learning_rate": 3.452058911932672e-05, - "loss": 0.6115, - "step": 105060 - }, - { - "epoch": 0.928853056100709, - "grad_norm": 10.423240661621094, - "learning_rate": 3.451911573165485e-05, - "loss": 0.5134, - "step": 105070 - }, - { - "epoch": 0.9289414593610212, - "grad_norm": 2.9946515560150146, - "learning_rate": 3.451764234398298e-05, - "loss": 0.5499, - "step": 105080 - }, - { - "epoch": 0.9290298626213335, - "grad_norm": 4.508096694946289, - "learning_rate": 3.451616895631111e-05, - "loss": 0.609, - "step": 105090 - }, - { - "epoch": 0.9291182658816457, - "grad_norm": 7.4385271072387695, - "learning_rate": 3.451469556863924e-05, - "loss": 0.8378, - "step": 105100 - }, - { - "epoch": 0.9292066691419579, - "grad_norm": 2.4653961658477783, - "learning_rate": 3.451322218096737e-05, - "loss": 0.6597, - "step": 105110 - }, - { - "epoch": 0.9292950724022702, - "grad_norm": 1.977466106414795, - "learning_rate": 3.45117487932955e-05, - "loss": 0.7119, - "step": 105120 - }, - { - "epoch": 0.9293834756625824, - "grad_norm": 2.236581325531006, - "learning_rate": 3.4510275405623627e-05, - "loss": 0.6094, - "step": 105130 - }, - { - "epoch": 0.9294718789228946, - "grad_norm": 1.236483097076416, - "learning_rate": 3.4508802017951755e-05, - "loss": 0.6374, - "step": 105140 - }, - { - "epoch": 0.929560282183207, - "grad_norm": 4.176371097564697, - "learning_rate": 3.450732863027988e-05, - "loss": 0.5527, - "step": 105150 - }, - { - "epoch": 0.9296486854435192, - "grad_norm": 1.9775093793869019, - "learning_rate": 3.450585524260802e-05, - "loss": 0.6298, - "step": 105160 - }, - { - "epoch": 0.9297370887038314, - "grad_norm": 4.56240177154541, - "learning_rate": 3.450438185493615e-05, - "loss": 0.6215, - "step": 105170 - }, - { - "epoch": 0.9298254919641437, - "grad_norm": 3.758308172225952, - "learning_rate": 3.4502908467264275e-05, - "loss": 0.7157, - "step": 105180 - }, - { - "epoch": 0.9299138952244559, - "grad_norm": 5.263377666473389, - "learning_rate": 3.4501435079592403e-05, - "loss": 0.8083, - "step": 105190 - }, - { - "epoch": 0.9300022984847681, - "grad_norm": 4.500478267669678, - "learning_rate": 3.449996169192053e-05, - "loss": 0.6416, - "step": 105200 - }, - { - "epoch": 0.9300907017450804, - "grad_norm": 1.5850861072540283, - "learning_rate": 3.449848830424866e-05, - "loss": 0.5494, - "step": 105210 - }, - { - "epoch": 0.9301791050053926, - "grad_norm": 2.821803331375122, - "learning_rate": 3.4497014916576795e-05, - "loss": 0.5608, - "step": 105220 - }, - { - "epoch": 0.9302675082657048, - "grad_norm": 1.5779587030410767, - "learning_rate": 3.4495541528904924e-05, - "loss": 0.6611, - "step": 105230 - }, - { - "epoch": 0.930355911526017, - "grad_norm": 5.891751766204834, - "learning_rate": 3.449406814123305e-05, - "loss": 0.6683, - "step": 105240 - }, - { - "epoch": 0.9304443147863293, - "grad_norm": 2.789013624191284, - "learning_rate": 3.449259475356118e-05, - "loss": 0.5944, - "step": 105250 - }, - { - "epoch": 0.9305327180466415, - "grad_norm": 3.7947304248809814, - "learning_rate": 3.449112136588931e-05, - "loss": 0.5688, - "step": 105260 - }, - { - "epoch": 0.9306211213069538, - "grad_norm": 4.604398727416992, - "learning_rate": 3.448964797821744e-05, - "loss": 0.6049, - "step": 105270 - }, - { - "epoch": 0.9307095245672661, - "grad_norm": 1.2592705488204956, - "learning_rate": 3.448817459054557e-05, - "loss": 0.6481, - "step": 105280 - }, - { - "epoch": 0.9307979278275783, - "grad_norm": 1.717741847038269, - "learning_rate": 3.4486701202873694e-05, - "loss": 0.6046, - "step": 105290 - }, - { - "epoch": 0.9308863310878905, - "grad_norm": 0.8107909560203552, - "learning_rate": 3.448522781520183e-05, - "loss": 0.6765, - "step": 105300 - }, - { - "epoch": 0.9309747343482028, - "grad_norm": 9.534072875976562, - "learning_rate": 3.448375442752996e-05, - "loss": 0.6768, - "step": 105310 - }, - { - "epoch": 0.931063137608515, - "grad_norm": 2.568861961364746, - "learning_rate": 3.4482281039858086e-05, - "loss": 0.628, - "step": 105320 - }, - { - "epoch": 0.9311515408688272, - "grad_norm": 11.862177848815918, - "learning_rate": 3.4480807652186214e-05, - "loss": 0.6139, - "step": 105330 - }, - { - "epoch": 0.9312399441291395, - "grad_norm": 1.8706004619598389, - "learning_rate": 3.447933426451435e-05, - "loss": 0.5692, - "step": 105340 - }, - { - "epoch": 0.9313283473894517, - "grad_norm": 2.5620651245117188, - "learning_rate": 3.447786087684247e-05, - "loss": 0.5955, - "step": 105350 - }, - { - "epoch": 0.9314167506497639, - "grad_norm": 1.167807936668396, - "learning_rate": 3.4476387489170606e-05, - "loss": 0.6981, - "step": 105360 - }, - { - "epoch": 0.9315051539100762, - "grad_norm": 2.324995517730713, - "learning_rate": 3.4474914101498734e-05, - "loss": 0.6546, - "step": 105370 - }, - { - "epoch": 0.9315935571703885, - "grad_norm": 2.697660207748413, - "learning_rate": 3.447344071382686e-05, - "loss": 0.5719, - "step": 105380 - }, - { - "epoch": 0.9316819604307007, - "grad_norm": 9.180237770080566, - "learning_rate": 3.447196732615499e-05, - "loss": 0.7306, - "step": 105390 - }, - { - "epoch": 0.931770363691013, - "grad_norm": 2.1576290130615234, - "learning_rate": 3.447049393848312e-05, - "loss": 0.7309, - "step": 105400 - }, - { - "epoch": 0.9318587669513252, - "grad_norm": 2.53000545501709, - "learning_rate": 3.446902055081125e-05, - "loss": 0.5871, - "step": 105410 - }, - { - "epoch": 0.9319471702116374, - "grad_norm": 2.2341485023498535, - "learning_rate": 3.446754716313938e-05, - "loss": 0.8134, - "step": 105420 - }, - { - "epoch": 0.9320355734719497, - "grad_norm": 12.517487525939941, - "learning_rate": 3.4466073775467504e-05, - "loss": 0.6647, - "step": 105430 - }, - { - "epoch": 0.9321239767322619, - "grad_norm": 3.3855173587799072, - "learning_rate": 3.446460038779564e-05, - "loss": 0.6319, - "step": 105440 - }, - { - "epoch": 0.9322123799925741, - "grad_norm": 2.9722278118133545, - "learning_rate": 3.446312700012377e-05, - "loss": 0.6604, - "step": 105450 - }, - { - "epoch": 0.9323007832528863, - "grad_norm": 2.611067533493042, - "learning_rate": 3.4461653612451896e-05, - "loss": 0.7337, - "step": 105460 - }, - { - "epoch": 0.9323891865131986, - "grad_norm": 1.7677834033966064, - "learning_rate": 3.4460180224780024e-05, - "loss": 0.6748, - "step": 105470 - }, - { - "epoch": 0.9324775897735108, - "grad_norm": 2.7088329792022705, - "learning_rate": 3.445870683710816e-05, - "loss": 0.4969, - "step": 105480 - }, - { - "epoch": 0.932565993033823, - "grad_norm": 1.6849554777145386, - "learning_rate": 3.445723344943628e-05, - "loss": 0.5471, - "step": 105490 - }, - { - "epoch": 0.9326543962941354, - "grad_norm": 3.012347936630249, - "learning_rate": 3.4455760061764416e-05, - "loss": 0.6974, - "step": 105500 - }, - { - "epoch": 0.9327427995544476, - "grad_norm": 1.863147497177124, - "learning_rate": 3.445428667409254e-05, - "loss": 0.686, - "step": 105510 - }, - { - "epoch": 0.9328312028147598, - "grad_norm": 2.776118755340576, - "learning_rate": 3.445281328642067e-05, - "loss": 0.6664, - "step": 105520 - }, - { - "epoch": 0.9329196060750721, - "grad_norm": 9.31080436706543, - "learning_rate": 3.44513398987488e-05, - "loss": 0.5442, - "step": 105530 - }, - { - "epoch": 0.9330080093353843, - "grad_norm": 1.9648231267929077, - "learning_rate": 3.444986651107693e-05, - "loss": 0.7315, - "step": 105540 - }, - { - "epoch": 0.9330964125956965, - "grad_norm": 6.062094688415527, - "learning_rate": 3.444839312340506e-05, - "loss": 0.6707, - "step": 105550 - }, - { - "epoch": 0.9331848158560088, - "grad_norm": 2.9668476581573486, - "learning_rate": 3.444691973573319e-05, - "loss": 0.5714, - "step": 105560 - }, - { - "epoch": 0.933273219116321, - "grad_norm": 5.547146320343018, - "learning_rate": 3.4445446348061315e-05, - "loss": 0.6385, - "step": 105570 - }, - { - "epoch": 0.9333616223766332, - "grad_norm": 4.097261428833008, - "learning_rate": 3.444397296038945e-05, - "loss": 0.7184, - "step": 105580 - }, - { - "epoch": 0.9334500256369455, - "grad_norm": 4.441718101501465, - "learning_rate": 3.444249957271758e-05, - "loss": 0.6514, - "step": 105590 - }, - { - "epoch": 0.9335384288972577, - "grad_norm": 1.865618348121643, - "learning_rate": 3.4441026185045707e-05, - "loss": 0.6407, - "step": 105600 - }, - { - "epoch": 0.9336268321575699, - "grad_norm": 3.6917524337768555, - "learning_rate": 3.4439552797373835e-05, - "loss": 0.68, - "step": 105610 - }, - { - "epoch": 0.9337152354178823, - "grad_norm": 2.2410688400268555, - "learning_rate": 3.443807940970196e-05, - "loss": 0.6834, - "step": 105620 - }, - { - "epoch": 0.9338036386781945, - "grad_norm": 3.7255070209503174, - "learning_rate": 3.443660602203009e-05, - "loss": 0.6979, - "step": 105630 - }, - { - "epoch": 0.9338920419385067, - "grad_norm": 2.3326375484466553, - "learning_rate": 3.443513263435823e-05, - "loss": 0.5148, - "step": 105640 - }, - { - "epoch": 0.933980445198819, - "grad_norm": 12.944406509399414, - "learning_rate": 3.443365924668635e-05, - "loss": 0.6967, - "step": 105650 - }, - { - "epoch": 0.9340688484591312, - "grad_norm": 1.932525634765625, - "learning_rate": 3.4432185859014483e-05, - "loss": 0.5949, - "step": 105660 - }, - { - "epoch": 0.9341572517194434, - "grad_norm": 1.6108492612838745, - "learning_rate": 3.443071247134261e-05, - "loss": 0.6319, - "step": 105670 - }, - { - "epoch": 0.9342456549797556, - "grad_norm": 2.996105909347534, - "learning_rate": 3.442923908367074e-05, - "loss": 0.6132, - "step": 105680 - }, - { - "epoch": 0.9343340582400679, - "grad_norm": 3.7411937713623047, - "learning_rate": 3.442776569599887e-05, - "loss": 0.5928, - "step": 105690 - }, - { - "epoch": 0.9344224615003801, - "grad_norm": 3.852276563644409, - "learning_rate": 3.4426292308327004e-05, - "loss": 0.6142, - "step": 105700 - }, - { - "epoch": 0.9345108647606923, - "grad_norm": 2.47209095954895, - "learning_rate": 3.4424818920655125e-05, - "loss": 0.7048, - "step": 105710 - }, - { - "epoch": 0.9345992680210046, - "grad_norm": 6.194769859313965, - "learning_rate": 3.442334553298326e-05, - "loss": 0.5949, - "step": 105720 - }, - { - "epoch": 0.9346876712813168, - "grad_norm": 2.719789981842041, - "learning_rate": 3.442187214531139e-05, - "loss": 0.6168, - "step": 105730 - }, - { - "epoch": 0.9347760745416291, - "grad_norm": 5.138671398162842, - "learning_rate": 3.442039875763952e-05, - "loss": 0.6338, - "step": 105740 - }, - { - "epoch": 0.9348644778019414, - "grad_norm": 1.496875286102295, - "learning_rate": 3.4418925369967645e-05, - "loss": 0.668, - "step": 105750 - }, - { - "epoch": 0.9349528810622536, - "grad_norm": 1.9950506687164307, - "learning_rate": 3.4417451982295774e-05, - "loss": 0.7026, - "step": 105760 - }, - { - "epoch": 0.9350412843225658, - "grad_norm": 1.2310317754745483, - "learning_rate": 3.44159785946239e-05, - "loss": 0.6841, - "step": 105770 - }, - { - "epoch": 0.9351296875828781, - "grad_norm": 14.722799301147461, - "learning_rate": 3.441450520695204e-05, - "loss": 0.7115, - "step": 105780 - }, - { - "epoch": 0.9352180908431903, - "grad_norm": 2.9561312198638916, - "learning_rate": 3.441303181928016e-05, - "loss": 0.7107, - "step": 105790 - }, - { - "epoch": 0.9353064941035025, - "grad_norm": 5.269919395446777, - "learning_rate": 3.4411558431608294e-05, - "loss": 0.556, - "step": 105800 - }, - { - "epoch": 0.9353948973638148, - "grad_norm": 9.491625785827637, - "learning_rate": 3.441008504393642e-05, - "loss": 0.7069, - "step": 105810 - }, - { - "epoch": 0.935483300624127, - "grad_norm": 2.8752474784851074, - "learning_rate": 3.440861165626455e-05, - "loss": 0.605, - "step": 105820 - }, - { - "epoch": 0.9355717038844392, - "grad_norm": 6.255155086517334, - "learning_rate": 3.440713826859268e-05, - "loss": 0.7178, - "step": 105830 - }, - { - "epoch": 0.9356601071447515, - "grad_norm": 2.972198724746704, - "learning_rate": 3.4405664880920814e-05, - "loss": 0.6154, - "step": 105840 - }, - { - "epoch": 0.9357485104050638, - "grad_norm": 1.6700375080108643, - "learning_rate": 3.4404191493248936e-05, - "loss": 0.5742, - "step": 105850 - }, - { - "epoch": 0.935836913665376, - "grad_norm": 1.5107364654541016, - "learning_rate": 3.440271810557707e-05, - "loss": 0.5957, - "step": 105860 - }, - { - "epoch": 0.9359253169256883, - "grad_norm": 1.121809482574463, - "learning_rate": 3.440124471790519e-05, - "loss": 0.6361, - "step": 105870 - }, - { - "epoch": 0.9360137201860005, - "grad_norm": 1.8884894847869873, - "learning_rate": 3.439977133023333e-05, - "loss": 0.6003, - "step": 105880 - }, - { - "epoch": 0.9361021234463127, - "grad_norm": 1.7910462617874146, - "learning_rate": 3.4398297942561456e-05, - "loss": 0.5779, - "step": 105890 - }, - { - "epoch": 0.936190526706625, - "grad_norm": 7.440021514892578, - "learning_rate": 3.4396824554889584e-05, - "loss": 0.6207, - "step": 105900 - }, - { - "epoch": 0.9362789299669372, - "grad_norm": 11.046977996826172, - "learning_rate": 3.439535116721771e-05, - "loss": 0.6097, - "step": 105910 - }, - { - "epoch": 0.9363673332272494, - "grad_norm": 1.3227531909942627, - "learning_rate": 3.439387777954585e-05, - "loss": 0.608, - "step": 105920 - }, - { - "epoch": 0.9364557364875616, - "grad_norm": 1.1968457698822021, - "learning_rate": 3.439240439187397e-05, - "loss": 0.5547, - "step": 105930 - }, - { - "epoch": 0.9365441397478739, - "grad_norm": 2.193240165710449, - "learning_rate": 3.4390931004202105e-05, - "loss": 0.5987, - "step": 105940 - }, - { - "epoch": 0.9366325430081861, - "grad_norm": 3.277909517288208, - "learning_rate": 3.438945761653023e-05, - "loss": 0.6261, - "step": 105950 - }, - { - "epoch": 0.9367209462684983, - "grad_norm": 1.5054796934127808, - "learning_rate": 3.438798422885836e-05, - "loss": 0.6798, - "step": 105960 - }, - { - "epoch": 0.9368093495288107, - "grad_norm": 1.54038667678833, - "learning_rate": 3.438651084118649e-05, - "loss": 0.6678, - "step": 105970 - }, - { - "epoch": 0.9368977527891229, - "grad_norm": 10.033136367797852, - "learning_rate": 3.438503745351462e-05, - "loss": 0.5729, - "step": 105980 - }, - { - "epoch": 0.9369861560494351, - "grad_norm": 1.7395009994506836, - "learning_rate": 3.4383564065842746e-05, - "loss": 0.5782, - "step": 105990 - }, - { - "epoch": 0.9370745593097474, - "grad_norm": 8.441571235656738, - "learning_rate": 3.438209067817088e-05, - "loss": 0.7053, - "step": 106000 - }, - { - "epoch": 0.9371629625700596, - "grad_norm": 4.995687007904053, - "learning_rate": 3.438061729049901e-05, - "loss": 0.658, - "step": 106010 - }, - { - "epoch": 0.9372513658303718, - "grad_norm": 3.8771445751190186, - "learning_rate": 3.437914390282714e-05, - "loss": 0.7503, - "step": 106020 - }, - { - "epoch": 0.9373397690906841, - "grad_norm": 3.065551280975342, - "learning_rate": 3.4377670515155266e-05, - "loss": 0.6886, - "step": 106030 - }, - { - "epoch": 0.9374281723509963, - "grad_norm": 3.868722915649414, - "learning_rate": 3.4376197127483395e-05, - "loss": 0.5327, - "step": 106040 - }, - { - "epoch": 0.9375165756113085, - "grad_norm": 1.6570860147476196, - "learning_rate": 3.437472373981152e-05, - "loss": 0.5965, - "step": 106050 - }, - { - "epoch": 0.9376049788716208, - "grad_norm": 7.585522651672363, - "learning_rate": 3.437325035213966e-05, - "loss": 0.6274, - "step": 106060 - }, - { - "epoch": 0.937693382131933, - "grad_norm": 5.415796279907227, - "learning_rate": 3.437177696446779e-05, - "loss": 0.7436, - "step": 106070 - }, - { - "epoch": 0.9377817853922452, - "grad_norm": 4.855235576629639, - "learning_rate": 3.4370303576795915e-05, - "loss": 0.7077, - "step": 106080 - }, - { - "epoch": 0.9378701886525576, - "grad_norm": 1.1730399131774902, - "learning_rate": 3.436883018912404e-05, - "loss": 0.5663, - "step": 106090 - }, - { - "epoch": 0.9379585919128698, - "grad_norm": 13.284073829650879, - "learning_rate": 3.436735680145217e-05, - "loss": 0.599, - "step": 106100 - }, - { - "epoch": 0.938046995173182, - "grad_norm": 3.193488359451294, - "learning_rate": 3.43658834137803e-05, - "loss": 0.642, - "step": 106110 - }, - { - "epoch": 0.9381353984334942, - "grad_norm": 12.354021072387695, - "learning_rate": 3.436441002610843e-05, - "loss": 0.588, - "step": 106120 - }, - { - "epoch": 0.9382238016938065, - "grad_norm": 2.084394931793213, - "learning_rate": 3.4362936638436564e-05, - "loss": 0.6375, - "step": 106130 - }, - { - "epoch": 0.9383122049541187, - "grad_norm": 7.386837005615234, - "learning_rate": 3.436146325076469e-05, - "loss": 0.7165, - "step": 106140 - }, - { - "epoch": 0.9384006082144309, - "grad_norm": 12.498074531555176, - "learning_rate": 3.435998986309282e-05, - "loss": 0.6787, - "step": 106150 - }, - { - "epoch": 0.9384890114747432, - "grad_norm": 2.8941307067871094, - "learning_rate": 3.435851647542095e-05, - "loss": 0.7384, - "step": 106160 - }, - { - "epoch": 0.9385774147350554, - "grad_norm": 6.089250087738037, - "learning_rate": 3.435704308774908e-05, - "loss": 0.6384, - "step": 106170 - }, - { - "epoch": 0.9386658179953676, - "grad_norm": 3.2717485427856445, - "learning_rate": 3.4355569700077205e-05, - "loss": 0.6729, - "step": 106180 - }, - { - "epoch": 0.9387542212556799, - "grad_norm": 2.67104172706604, - "learning_rate": 3.435409631240534e-05, - "loss": 0.6155, - "step": 106190 - }, - { - "epoch": 0.9388426245159921, - "grad_norm": 3.3363802433013916, - "learning_rate": 3.435262292473347e-05, - "loss": 0.5862, - "step": 106200 - }, - { - "epoch": 0.9389310277763044, - "grad_norm": 2.501697301864624, - "learning_rate": 3.43511495370616e-05, - "loss": 0.7169, - "step": 106210 - }, - { - "epoch": 0.9390194310366167, - "grad_norm": 3.346224069595337, - "learning_rate": 3.4349676149389726e-05, - "loss": 0.6054, - "step": 106220 - }, - { - "epoch": 0.9391078342969289, - "grad_norm": 2.4818291664123535, - "learning_rate": 3.4348202761717854e-05, - "loss": 0.7401, - "step": 106230 - }, - { - "epoch": 0.9391962375572411, - "grad_norm": 3.6289844512939453, - "learning_rate": 3.434672937404598e-05, - "loss": 0.7041, - "step": 106240 - }, - { - "epoch": 0.9392846408175534, - "grad_norm": 1.087025761604309, - "learning_rate": 3.434525598637412e-05, - "loss": 0.7006, - "step": 106250 - }, - { - "epoch": 0.9393730440778656, - "grad_norm": 8.719230651855469, - "learning_rate": 3.434378259870224e-05, - "loss": 0.6008, - "step": 106260 - }, - { - "epoch": 0.9394614473381778, - "grad_norm": 4.29959774017334, - "learning_rate": 3.4342309211030374e-05, - "loss": 0.6814, - "step": 106270 - }, - { - "epoch": 0.93954985059849, - "grad_norm": 1.403823733329773, - "learning_rate": 3.43408358233585e-05, - "loss": 0.6274, - "step": 106280 - }, - { - "epoch": 0.9396382538588023, - "grad_norm": 4.81955623626709, - "learning_rate": 3.433936243568663e-05, - "loss": 0.6377, - "step": 106290 - }, - { - "epoch": 0.9397266571191145, - "grad_norm": 7.307697296142578, - "learning_rate": 3.433788904801476e-05, - "loss": 0.5989, - "step": 106300 - }, - { - "epoch": 0.9398150603794267, - "grad_norm": 8.821192741394043, - "learning_rate": 3.4336415660342894e-05, - "loss": 0.5647, - "step": 106310 - }, - { - "epoch": 0.939903463639739, - "grad_norm": 3.9636425971984863, - "learning_rate": 3.4334942272671016e-05, - "loss": 0.6552, - "step": 106320 - }, - { - "epoch": 0.9399918669000513, - "grad_norm": 2.2295689582824707, - "learning_rate": 3.433346888499915e-05, - "loss": 0.6519, - "step": 106330 - }, - { - "epoch": 0.9400802701603636, - "grad_norm": 2.579984426498413, - "learning_rate": 3.433199549732727e-05, - "loss": 0.6199, - "step": 106340 - }, - { - "epoch": 0.9401686734206758, - "grad_norm": 10.387164115905762, - "learning_rate": 3.433052210965541e-05, - "loss": 0.5651, - "step": 106350 - }, - { - "epoch": 0.940257076680988, - "grad_norm": 3.2856268882751465, - "learning_rate": 3.4329048721983536e-05, - "loss": 0.6841, - "step": 106360 - }, - { - "epoch": 0.9403454799413002, - "grad_norm": 1.4172332286834717, - "learning_rate": 3.4327575334311664e-05, - "loss": 0.6082, - "step": 106370 - }, - { - "epoch": 0.9404338832016125, - "grad_norm": 5.022871017456055, - "learning_rate": 3.432610194663979e-05, - "loss": 0.6535, - "step": 106380 - }, - { - "epoch": 0.9405222864619247, - "grad_norm": 11.879000663757324, - "learning_rate": 3.432462855896793e-05, - "loss": 0.6264, - "step": 106390 - }, - { - "epoch": 0.9406106897222369, - "grad_norm": 2.9734277725219727, - "learning_rate": 3.432315517129605e-05, - "loss": 0.637, - "step": 106400 - }, - { - "epoch": 0.9406990929825492, - "grad_norm": 5.496205806732178, - "learning_rate": 3.4321681783624185e-05, - "loss": 0.6466, - "step": 106410 - }, - { - "epoch": 0.9407874962428614, - "grad_norm": 2.9897515773773193, - "learning_rate": 3.432020839595231e-05, - "loss": 0.7122, - "step": 106420 - }, - { - "epoch": 0.9408758995031736, - "grad_norm": 1.718722939491272, - "learning_rate": 3.431873500828044e-05, - "loss": 0.5625, - "step": 106430 - }, - { - "epoch": 0.940964302763486, - "grad_norm": 1.3780550956726074, - "learning_rate": 3.431726162060857e-05, - "loss": 0.5758, - "step": 106440 - }, - { - "epoch": 0.9410527060237982, - "grad_norm": 4.742579460144043, - "learning_rate": 3.43157882329367e-05, - "loss": 0.6305, - "step": 106450 - }, - { - "epoch": 0.9411411092841104, - "grad_norm": 5.992853164672852, - "learning_rate": 3.4314314845264826e-05, - "loss": 0.6176, - "step": 106460 - }, - { - "epoch": 0.9412295125444227, - "grad_norm": 4.425865173339844, - "learning_rate": 3.431284145759296e-05, - "loss": 0.609, - "step": 106470 - }, - { - "epoch": 0.9413179158047349, - "grad_norm": 1.596043348312378, - "learning_rate": 3.431136806992108e-05, - "loss": 0.6324, - "step": 106480 - }, - { - "epoch": 0.9414063190650471, - "grad_norm": 8.001104354858398, - "learning_rate": 3.430989468224922e-05, - "loss": 0.6937, - "step": 106490 - }, - { - "epoch": 0.9414947223253594, - "grad_norm": 5.724034786224365, - "learning_rate": 3.4308421294577347e-05, - "loss": 0.5984, - "step": 106500 - }, - { - "epoch": 0.9415831255856716, - "grad_norm": 10.905638694763184, - "learning_rate": 3.4306947906905475e-05, - "loss": 0.6456, - "step": 106510 - }, - { - "epoch": 0.9416715288459838, - "grad_norm": 2.4138989448547363, - "learning_rate": 3.43054745192336e-05, - "loss": 0.6583, - "step": 106520 - }, - { - "epoch": 0.941759932106296, - "grad_norm": 5.834841728210449, - "learning_rate": 3.430400113156174e-05, - "loss": 0.7593, - "step": 106530 - }, - { - "epoch": 0.9418483353666083, - "grad_norm": 4.196831226348877, - "learning_rate": 3.430252774388986e-05, - "loss": 0.5705, - "step": 106540 - }, - { - "epoch": 0.9419367386269205, - "grad_norm": 2.742556095123291, - "learning_rate": 3.4301054356217995e-05, - "loss": 0.5533, - "step": 106550 - }, - { - "epoch": 0.9420251418872329, - "grad_norm": 11.296940803527832, - "learning_rate": 3.429958096854612e-05, - "loss": 0.5298, - "step": 106560 - }, - { - "epoch": 0.9421135451475451, - "grad_norm": 3.208120107650757, - "learning_rate": 3.429810758087425e-05, - "loss": 0.6616, - "step": 106570 - }, - { - "epoch": 0.9422019484078573, - "grad_norm": 3.070038318634033, - "learning_rate": 3.429663419320238e-05, - "loss": 0.5938, - "step": 106580 - }, - { - "epoch": 0.9422903516681695, - "grad_norm": 8.327720642089844, - "learning_rate": 3.429516080553051e-05, - "loss": 0.7682, - "step": 106590 - }, - { - "epoch": 0.9423787549284818, - "grad_norm": 4.176601409912109, - "learning_rate": 3.429368741785864e-05, - "loss": 0.602, - "step": 106600 - }, - { - "epoch": 0.942467158188794, - "grad_norm": 4.963527679443359, - "learning_rate": 3.429221403018677e-05, - "loss": 0.6014, - "step": 106610 - }, - { - "epoch": 0.9425555614491062, - "grad_norm": 1.4309555292129517, - "learning_rate": 3.4290740642514894e-05, - "loss": 0.571, - "step": 106620 - }, - { - "epoch": 0.9426439647094185, - "grad_norm": 0.8592166304588318, - "learning_rate": 3.428926725484303e-05, - "loss": 0.7284, - "step": 106630 - }, - { - "epoch": 0.9427323679697307, - "grad_norm": 10.35663890838623, - "learning_rate": 3.428779386717116e-05, - "loss": 0.638, - "step": 106640 - }, - { - "epoch": 0.9428207712300429, - "grad_norm": 2.0613021850585938, - "learning_rate": 3.4286320479499285e-05, - "loss": 0.5058, - "step": 106650 - }, - { - "epoch": 0.9429091744903552, - "grad_norm": 1.8221769332885742, - "learning_rate": 3.4284847091827414e-05, - "loss": 0.7491, - "step": 106660 - }, - { - "epoch": 0.9429975777506674, - "grad_norm": 3.9394474029541016, - "learning_rate": 3.428337370415555e-05, - "loss": 0.5745, - "step": 106670 - }, - { - "epoch": 0.9430859810109797, - "grad_norm": 1.4672960042953491, - "learning_rate": 3.428190031648367e-05, - "loss": 0.5828, - "step": 106680 - }, - { - "epoch": 0.943174384271292, - "grad_norm": 1.6499518156051636, - "learning_rate": 3.4280426928811806e-05, - "loss": 0.5971, - "step": 106690 - }, - { - "epoch": 0.9432627875316042, - "grad_norm": 5.15764856338501, - "learning_rate": 3.427895354113993e-05, - "loss": 0.7189, - "step": 106700 - }, - { - "epoch": 0.9433511907919164, - "grad_norm": 4.637803554534912, - "learning_rate": 3.427748015346806e-05, - "loss": 0.587, - "step": 106710 - }, - { - "epoch": 0.9434395940522287, - "grad_norm": 2.552391767501831, - "learning_rate": 3.427600676579619e-05, - "loss": 0.6569, - "step": 106720 - }, - { - "epoch": 0.9435279973125409, - "grad_norm": 4.291319370269775, - "learning_rate": 3.427453337812432e-05, - "loss": 0.7166, - "step": 106730 - }, - { - "epoch": 0.9436164005728531, - "grad_norm": 3.2860984802246094, - "learning_rate": 3.427305999045245e-05, - "loss": 0.6575, - "step": 106740 - }, - { - "epoch": 0.9437048038331654, - "grad_norm": 3.7059011459350586, - "learning_rate": 3.427158660278058e-05, - "loss": 0.5796, - "step": 106750 - }, - { - "epoch": 0.9437932070934776, - "grad_norm": 3.594114303588867, - "learning_rate": 3.4270113215108704e-05, - "loss": 0.6662, - "step": 106760 - }, - { - "epoch": 0.9438816103537898, - "grad_norm": 2.9780309200286865, - "learning_rate": 3.426863982743684e-05, - "loss": 0.5473, - "step": 106770 - }, - { - "epoch": 0.943970013614102, - "grad_norm": 11.914055824279785, - "learning_rate": 3.426716643976497e-05, - "loss": 0.6997, - "step": 106780 - }, - { - "epoch": 0.9440584168744143, - "grad_norm": 1.9557054042816162, - "learning_rate": 3.4265693052093096e-05, - "loss": 0.6747, - "step": 106790 - }, - { - "epoch": 0.9441468201347266, - "grad_norm": 6.067456245422363, - "learning_rate": 3.4264219664421224e-05, - "loss": 0.6688, - "step": 106800 - }, - { - "epoch": 0.9442352233950388, - "grad_norm": 1.1976491212844849, - "learning_rate": 3.426274627674935e-05, - "loss": 0.664, - "step": 106810 - }, - { - "epoch": 0.9443236266553511, - "grad_norm": 4.096565246582031, - "learning_rate": 3.426127288907748e-05, - "loss": 0.6322, - "step": 106820 - }, - { - "epoch": 0.9444120299156633, - "grad_norm": 17.041379928588867, - "learning_rate": 3.4259799501405616e-05, - "loss": 0.6444, - "step": 106830 - }, - { - "epoch": 0.9445004331759755, - "grad_norm": 1.333940029144287, - "learning_rate": 3.425832611373374e-05, - "loss": 0.5801, - "step": 106840 - }, - { - "epoch": 0.9445888364362878, - "grad_norm": 2.1787521839141846, - "learning_rate": 3.425685272606187e-05, - "loss": 0.5083, - "step": 106850 - }, - { - "epoch": 0.9446772396966, - "grad_norm": 1.6780534982681274, - "learning_rate": 3.425537933839e-05, - "loss": 0.6349, - "step": 106860 - }, - { - "epoch": 0.9447656429569122, - "grad_norm": 5.518852710723877, - "learning_rate": 3.425390595071813e-05, - "loss": 0.6507, - "step": 106870 - }, - { - "epoch": 0.9448540462172245, - "grad_norm": 7.130365371704102, - "learning_rate": 3.425243256304626e-05, - "loss": 0.5616, - "step": 106880 - }, - { - "epoch": 0.9449424494775367, - "grad_norm": 3.413080930709839, - "learning_rate": 3.425095917537439e-05, - "loss": 0.588, - "step": 106890 - }, - { - "epoch": 0.9450308527378489, - "grad_norm": 0.8026190400123596, - "learning_rate": 3.4249485787702515e-05, - "loss": 0.6704, - "step": 106900 - }, - { - "epoch": 0.9451192559981613, - "grad_norm": 1.9579423666000366, - "learning_rate": 3.424801240003065e-05, - "loss": 0.6763, - "step": 106910 - }, - { - "epoch": 0.9452076592584735, - "grad_norm": 5.837519645690918, - "learning_rate": 3.424653901235878e-05, - "loss": 0.7122, - "step": 106920 - }, - { - "epoch": 0.9452960625187857, - "grad_norm": 5.711021423339844, - "learning_rate": 3.4245065624686906e-05, - "loss": 0.6986, - "step": 106930 - }, - { - "epoch": 0.945384465779098, - "grad_norm": 1.944549560546875, - "learning_rate": 3.4243592237015035e-05, - "loss": 0.6563, - "step": 106940 - }, - { - "epoch": 0.9454728690394102, - "grad_norm": 5.686873435974121, - "learning_rate": 3.424211884934316e-05, - "loss": 0.5243, - "step": 106950 - }, - { - "epoch": 0.9455612722997224, - "grad_norm": 4.338107585906982, - "learning_rate": 3.424064546167129e-05, - "loss": 0.6427, - "step": 106960 - }, - { - "epoch": 0.9456496755600347, - "grad_norm": 5.108939170837402, - "learning_rate": 3.4239172073999427e-05, - "loss": 0.657, - "step": 106970 - }, - { - "epoch": 0.9457380788203469, - "grad_norm": 9.524923324584961, - "learning_rate": 3.4237698686327555e-05, - "loss": 0.6514, - "step": 106980 - }, - { - "epoch": 0.9458264820806591, - "grad_norm": 11.318289756774902, - "learning_rate": 3.423622529865568e-05, - "loss": 0.6222, - "step": 106990 - }, - { - "epoch": 0.9459148853409713, - "grad_norm": 1.8790006637573242, - "learning_rate": 3.423475191098381e-05, - "loss": 0.5543, - "step": 107000 - }, - { - "epoch": 0.9460032886012836, - "grad_norm": 5.158295154571533, - "learning_rate": 3.423327852331194e-05, - "loss": 0.5889, - "step": 107010 - }, - { - "epoch": 0.9460916918615958, - "grad_norm": 2.221982717514038, - "learning_rate": 3.423180513564007e-05, - "loss": 0.6793, - "step": 107020 - }, - { - "epoch": 0.9461800951219081, - "grad_norm": 5.159732341766357, - "learning_rate": 3.42303317479682e-05, - "loss": 0.5866, - "step": 107030 - }, - { - "epoch": 0.9462684983822204, - "grad_norm": 1.9355392456054688, - "learning_rate": 3.422885836029633e-05, - "loss": 0.7064, - "step": 107040 - }, - { - "epoch": 0.9463569016425326, - "grad_norm": 13.628268241882324, - "learning_rate": 3.422738497262446e-05, - "loss": 0.774, - "step": 107050 - }, - { - "epoch": 0.9464453049028448, - "grad_norm": 2.6042873859405518, - "learning_rate": 3.422591158495259e-05, - "loss": 0.6066, - "step": 107060 - }, - { - "epoch": 0.9465337081631571, - "grad_norm": 7.6928815841674805, - "learning_rate": 3.422443819728072e-05, - "loss": 0.6226, - "step": 107070 - }, - { - "epoch": 0.9466221114234693, - "grad_norm": 1.4809387922286987, - "learning_rate": 3.4222964809608845e-05, - "loss": 0.567, - "step": 107080 - }, - { - "epoch": 0.9467105146837815, - "grad_norm": 2.3960022926330566, - "learning_rate": 3.4221491421936974e-05, - "loss": 0.6553, - "step": 107090 - }, - { - "epoch": 0.9467989179440938, - "grad_norm": 2.232804298400879, - "learning_rate": 3.422001803426511e-05, - "loss": 0.6052, - "step": 107100 - }, - { - "epoch": 0.946887321204406, - "grad_norm": 1.6408066749572754, - "learning_rate": 3.421854464659324e-05, - "loss": 0.6134, - "step": 107110 - }, - { - "epoch": 0.9469757244647182, - "grad_norm": 1.4858996868133545, - "learning_rate": 3.4217071258921365e-05, - "loss": 0.6503, - "step": 107120 - }, - { - "epoch": 0.9470641277250305, - "grad_norm": 2.2252790927886963, - "learning_rate": 3.4215597871249494e-05, - "loss": 0.7173, - "step": 107130 - }, - { - "epoch": 0.9471525309853427, - "grad_norm": 4.65252161026001, - "learning_rate": 3.421412448357762e-05, - "loss": 0.6642, - "step": 107140 - }, - { - "epoch": 0.947240934245655, - "grad_norm": 4.499752044677734, - "learning_rate": 3.421265109590575e-05, - "loss": 0.6111, - "step": 107150 - }, - { - "epoch": 0.9473293375059673, - "grad_norm": 1.6032524108886719, - "learning_rate": 3.4211177708233886e-05, - "loss": 0.6872, - "step": 107160 - }, - { - "epoch": 0.9474177407662795, - "grad_norm": 5.613959789276123, - "learning_rate": 3.420970432056201e-05, - "loss": 0.6062, - "step": 107170 - }, - { - "epoch": 0.9475061440265917, - "grad_norm": 1.392574429512024, - "learning_rate": 3.420823093289014e-05, - "loss": 0.6558, - "step": 107180 - }, - { - "epoch": 0.947594547286904, - "grad_norm": 1.549219012260437, - "learning_rate": 3.420675754521827e-05, - "loss": 0.6022, - "step": 107190 - }, - { - "epoch": 0.9476829505472162, - "grad_norm": 6.8509650230407715, - "learning_rate": 3.42052841575464e-05, - "loss": 0.6933, - "step": 107200 - }, - { - "epoch": 0.9477713538075284, - "grad_norm": 9.156034469604492, - "learning_rate": 3.420381076987453e-05, - "loss": 0.6242, - "step": 107210 - }, - { - "epoch": 0.9478597570678406, - "grad_norm": 6.534549236297607, - "learning_rate": 3.420233738220266e-05, - "loss": 0.6654, - "step": 107220 - }, - { - "epoch": 0.9479481603281529, - "grad_norm": 2.9824163913726807, - "learning_rate": 3.4200863994530784e-05, - "loss": 0.5776, - "step": 107230 - }, - { - "epoch": 0.9480365635884651, - "grad_norm": 7.644856929779053, - "learning_rate": 3.419939060685892e-05, - "loss": 0.7317, - "step": 107240 - }, - { - "epoch": 0.9481249668487773, - "grad_norm": 1.7401103973388672, - "learning_rate": 3.419791721918705e-05, - "loss": 0.5825, - "step": 107250 - }, - { - "epoch": 0.9482133701090896, - "grad_norm": 3.100944757461548, - "learning_rate": 3.4196443831515176e-05, - "loss": 0.6495, - "step": 107260 - }, - { - "epoch": 0.9483017733694019, - "grad_norm": 0.9705969095230103, - "learning_rate": 3.4194970443843304e-05, - "loss": 0.5088, - "step": 107270 - }, - { - "epoch": 0.9483901766297141, - "grad_norm": 5.885824203491211, - "learning_rate": 3.419349705617143e-05, - "loss": 0.6608, - "step": 107280 - }, - { - "epoch": 0.9484785798900264, - "grad_norm": 1.637331247329712, - "learning_rate": 3.419202366849956e-05, - "loss": 0.4585, - "step": 107290 - }, - { - "epoch": 0.9485669831503386, - "grad_norm": 7.709097862243652, - "learning_rate": 3.4190550280827696e-05, - "loss": 0.5675, - "step": 107300 - }, - { - "epoch": 0.9486553864106508, - "grad_norm": 13.357598304748535, - "learning_rate": 3.418907689315582e-05, - "loss": 0.7206, - "step": 107310 - }, - { - "epoch": 0.9487437896709631, - "grad_norm": 3.9393203258514404, - "learning_rate": 3.418760350548395e-05, - "loss": 0.653, - "step": 107320 - }, - { - "epoch": 0.9488321929312753, - "grad_norm": 5.50761079788208, - "learning_rate": 3.418613011781208e-05, - "loss": 0.6621, - "step": 107330 - }, - { - "epoch": 0.9489205961915875, - "grad_norm": 2.243089199066162, - "learning_rate": 3.418465673014021e-05, - "loss": 0.7987, - "step": 107340 - }, - { - "epoch": 0.9490089994518998, - "grad_norm": 1.6311918497085571, - "learning_rate": 3.418318334246834e-05, - "loss": 0.6362, - "step": 107350 - }, - { - "epoch": 0.949097402712212, - "grad_norm": 4.786495208740234, - "learning_rate": 3.418170995479647e-05, - "loss": 0.6874, - "step": 107360 - }, - { - "epoch": 0.9491858059725242, - "grad_norm": 2.9045217037200928, - "learning_rate": 3.4180236567124595e-05, - "loss": 0.6305, - "step": 107370 - }, - { - "epoch": 0.9492742092328365, - "grad_norm": 1.2427177429199219, - "learning_rate": 3.417876317945273e-05, - "loss": 0.6014, - "step": 107380 - }, - { - "epoch": 0.9493626124931488, - "grad_norm": 3.103736162185669, - "learning_rate": 3.417728979178085e-05, - "loss": 0.6472, - "step": 107390 - }, - { - "epoch": 0.949451015753461, - "grad_norm": 2.277155637741089, - "learning_rate": 3.4175816404108986e-05, - "loss": 0.6705, - "step": 107400 - }, - { - "epoch": 0.9495394190137733, - "grad_norm": 4.957957744598389, - "learning_rate": 3.4174343016437115e-05, - "loss": 0.7479, - "step": 107410 - }, - { - "epoch": 0.9496278222740855, - "grad_norm": 9.334573745727539, - "learning_rate": 3.417286962876524e-05, - "loss": 0.7009, - "step": 107420 - }, - { - "epoch": 0.9497162255343977, - "grad_norm": 4.930044174194336, - "learning_rate": 3.417139624109337e-05, - "loss": 0.6366, - "step": 107430 - }, - { - "epoch": 0.94980462879471, - "grad_norm": 2.5144917964935303, - "learning_rate": 3.416992285342151e-05, - "loss": 0.5848, - "step": 107440 - }, - { - "epoch": 0.9498930320550222, - "grad_norm": 2.3998749256134033, - "learning_rate": 3.416844946574963e-05, - "loss": 0.489, - "step": 107450 - }, - { - "epoch": 0.9499814353153344, - "grad_norm": 3.059237003326416, - "learning_rate": 3.416697607807776e-05, - "loss": 0.6635, - "step": 107460 - }, - { - "epoch": 0.9500698385756466, - "grad_norm": 1.5679110288619995, - "learning_rate": 3.416550269040589e-05, - "loss": 0.7331, - "step": 107470 - }, - { - "epoch": 0.9501582418359589, - "grad_norm": 7.488781929016113, - "learning_rate": 3.416402930273402e-05, - "loss": 0.5985, - "step": 107480 - }, - { - "epoch": 0.9502466450962711, - "grad_norm": 2.71647572517395, - "learning_rate": 3.416255591506215e-05, - "loss": 0.5026, - "step": 107490 - }, - { - "epoch": 0.9503350483565834, - "grad_norm": 1.9849530458450317, - "learning_rate": 3.416108252739028e-05, - "loss": 0.5606, - "step": 107500 - }, - { - "epoch": 0.9504234516168957, - "grad_norm": 10.521023750305176, - "learning_rate": 3.4159609139718405e-05, - "loss": 0.5648, - "step": 107510 - }, - { - "epoch": 0.9505118548772079, - "grad_norm": 2.396305561065674, - "learning_rate": 3.415813575204654e-05, - "loss": 0.6881, - "step": 107520 - }, - { - "epoch": 0.9506002581375201, - "grad_norm": 3.7700295448303223, - "learning_rate": 3.415666236437466e-05, - "loss": 0.5457, - "step": 107530 - }, - { - "epoch": 0.9506886613978324, - "grad_norm": 11.179518699645996, - "learning_rate": 3.41551889767028e-05, - "loss": 0.7302, - "step": 107540 - }, - { - "epoch": 0.9507770646581446, - "grad_norm": 1.3473808765411377, - "learning_rate": 3.4153715589030925e-05, - "loss": 0.6218, - "step": 107550 - }, - { - "epoch": 0.9508654679184568, - "grad_norm": 1.59479820728302, - "learning_rate": 3.4152242201359054e-05, - "loss": 0.6327, - "step": 107560 - }, - { - "epoch": 0.9509538711787691, - "grad_norm": 8.574318885803223, - "learning_rate": 3.415076881368718e-05, - "loss": 0.5492, - "step": 107570 - }, - { - "epoch": 0.9510422744390813, - "grad_norm": 1.1620367765426636, - "learning_rate": 3.414929542601532e-05, - "loss": 0.579, - "step": 107580 - }, - { - "epoch": 0.9511306776993935, - "grad_norm": 3.6802213191986084, - "learning_rate": 3.414782203834344e-05, - "loss": 0.6158, - "step": 107590 - }, - { - "epoch": 0.9512190809597058, - "grad_norm": 9.622323989868164, - "learning_rate": 3.4146348650671574e-05, - "loss": 0.7037, - "step": 107600 - }, - { - "epoch": 0.951307484220018, - "grad_norm": 6.923878192901611, - "learning_rate": 3.41448752629997e-05, - "loss": 0.7099, - "step": 107610 - }, - { - "epoch": 0.9513958874803303, - "grad_norm": 3.974891185760498, - "learning_rate": 3.414340187532783e-05, - "loss": 0.6255, - "step": 107620 - }, - { - "epoch": 0.9514842907406426, - "grad_norm": 12.173301696777344, - "learning_rate": 3.414192848765596e-05, - "loss": 0.6575, - "step": 107630 - }, - { - "epoch": 0.9515726940009548, - "grad_norm": 4.852212905883789, - "learning_rate": 3.414045509998409e-05, - "loss": 0.5951, - "step": 107640 - }, - { - "epoch": 0.951661097261267, - "grad_norm": 5.011682033538818, - "learning_rate": 3.4138981712312216e-05, - "loss": 0.6038, - "step": 107650 - }, - { - "epoch": 0.9517495005215793, - "grad_norm": 6.1992878913879395, - "learning_rate": 3.413750832464035e-05, - "loss": 0.5561, - "step": 107660 - }, - { - "epoch": 0.9518379037818915, - "grad_norm": 20.937747955322266, - "learning_rate": 3.413603493696847e-05, - "loss": 0.5559, - "step": 107670 - }, - { - "epoch": 0.9519263070422037, - "grad_norm": 11.46400260925293, - "learning_rate": 3.413456154929661e-05, - "loss": 0.7077, - "step": 107680 - }, - { - "epoch": 0.9520147103025159, - "grad_norm": 2.0332183837890625, - "learning_rate": 3.4133088161624736e-05, - "loss": 0.7002, - "step": 107690 - }, - { - "epoch": 0.9521031135628282, - "grad_norm": 1.3499661684036255, - "learning_rate": 3.4131614773952864e-05, - "loss": 0.6974, - "step": 107700 - }, - { - "epoch": 0.9521915168231404, - "grad_norm": 3.8344931602478027, - "learning_rate": 3.413014138628099e-05, - "loss": 0.6379, - "step": 107710 - }, - { - "epoch": 0.9522799200834526, - "grad_norm": 6.694114685058594, - "learning_rate": 3.412866799860913e-05, - "loss": 0.4893, - "step": 107720 - }, - { - "epoch": 0.9523683233437649, - "grad_norm": 5.147834300994873, - "learning_rate": 3.412719461093725e-05, - "loss": 0.6722, - "step": 107730 - }, - { - "epoch": 0.9524567266040772, - "grad_norm": 8.246710777282715, - "learning_rate": 3.4125721223265384e-05, - "loss": 0.7812, - "step": 107740 - }, - { - "epoch": 0.9525451298643894, - "grad_norm": 10.205009460449219, - "learning_rate": 3.4124247835593506e-05, - "loss": 0.6962, - "step": 107750 - }, - { - "epoch": 0.9526335331247017, - "grad_norm": 5.004541873931885, - "learning_rate": 3.412277444792164e-05, - "loss": 0.6664, - "step": 107760 - }, - { - "epoch": 0.9527219363850139, - "grad_norm": 2.0937769412994385, - "learning_rate": 3.412130106024977e-05, - "loss": 0.6484, - "step": 107770 - }, - { - "epoch": 0.9528103396453261, - "grad_norm": 2.0680203437805176, - "learning_rate": 3.41198276725779e-05, - "loss": 0.6528, - "step": 107780 - }, - { - "epoch": 0.9528987429056384, - "grad_norm": 1.4519532918930054, - "learning_rate": 3.4118354284906026e-05, - "loss": 0.7094, - "step": 107790 - }, - { - "epoch": 0.9529871461659506, - "grad_norm": 1.2675204277038574, - "learning_rate": 3.411688089723416e-05, - "loss": 0.551, - "step": 107800 - }, - { - "epoch": 0.9530755494262628, - "grad_norm": 1.4857728481292725, - "learning_rate": 3.411540750956228e-05, - "loss": 0.6719, - "step": 107810 - }, - { - "epoch": 0.9531639526865751, - "grad_norm": 3.813077211380005, - "learning_rate": 3.411393412189042e-05, - "loss": 0.5846, - "step": 107820 - }, - { - "epoch": 0.9532523559468873, - "grad_norm": 5.566223621368408, - "learning_rate": 3.4112460734218546e-05, - "loss": 0.5991, - "step": 107830 - }, - { - "epoch": 0.9533407592071995, - "grad_norm": 3.6968796253204346, - "learning_rate": 3.4110987346546675e-05, - "loss": 0.6139, - "step": 107840 - }, - { - "epoch": 0.9534291624675117, - "grad_norm": 0.9310871958732605, - "learning_rate": 3.41095139588748e-05, - "loss": 0.6337, - "step": 107850 - }, - { - "epoch": 0.9535175657278241, - "grad_norm": 2.691727876663208, - "learning_rate": 3.410804057120293e-05, - "loss": 0.6461, - "step": 107860 - }, - { - "epoch": 0.9536059689881363, - "grad_norm": 1.6179885864257812, - "learning_rate": 3.410656718353106e-05, - "loss": 0.6521, - "step": 107870 - }, - { - "epoch": 0.9536943722484486, - "grad_norm": 1.3111066818237305, - "learning_rate": 3.4105093795859195e-05, - "loss": 0.6831, - "step": 107880 - }, - { - "epoch": 0.9537827755087608, - "grad_norm": 1.5552518367767334, - "learning_rate": 3.410362040818732e-05, - "loss": 0.7639, - "step": 107890 - }, - { - "epoch": 0.953871178769073, - "grad_norm": 9.723103523254395, - "learning_rate": 3.410214702051545e-05, - "loss": 0.5838, - "step": 107900 - }, - { - "epoch": 0.9539595820293852, - "grad_norm": 1.4131728410720825, - "learning_rate": 3.410067363284358e-05, - "loss": 0.7367, - "step": 107910 - }, - { - "epoch": 0.9540479852896975, - "grad_norm": 2.871356248855591, - "learning_rate": 3.409920024517171e-05, - "loss": 0.6817, - "step": 107920 - }, - { - "epoch": 0.9541363885500097, - "grad_norm": 4.903386116027832, - "learning_rate": 3.409772685749984e-05, - "loss": 0.5768, - "step": 107930 - }, - { - "epoch": 0.9542247918103219, - "grad_norm": 1.263875961303711, - "learning_rate": 3.409625346982797e-05, - "loss": 0.6812, - "step": 107940 - }, - { - "epoch": 0.9543131950706342, - "grad_norm": 20.965744018554688, - "learning_rate": 3.40947800821561e-05, - "loss": 0.7873, - "step": 107950 - }, - { - "epoch": 0.9544015983309464, - "grad_norm": 1.4810017347335815, - "learning_rate": 3.409330669448423e-05, - "loss": 0.5582, - "step": 107960 - }, - { - "epoch": 0.9544900015912587, - "grad_norm": 1.8919059038162231, - "learning_rate": 3.409183330681236e-05, - "loss": 0.6934, - "step": 107970 - }, - { - "epoch": 0.954578404851571, - "grad_norm": 3.1167871952056885, - "learning_rate": 3.4090359919140485e-05, - "loss": 0.7143, - "step": 107980 - }, - { - "epoch": 0.9546668081118832, - "grad_norm": 2.9333460330963135, - "learning_rate": 3.4088886531468614e-05, - "loss": 0.6604, - "step": 107990 - }, - { - "epoch": 0.9547552113721954, - "grad_norm": 13.716352462768555, - "learning_rate": 3.408741314379674e-05, - "loss": 0.737, - "step": 108000 - }, - { - "epoch": 0.9548436146325077, - "grad_norm": 2.1939032077789307, - "learning_rate": 3.408593975612488e-05, - "loss": 0.7201, - "step": 108010 - }, - { - "epoch": 0.9549320178928199, - "grad_norm": 3.350382089614868, - "learning_rate": 3.4084466368453005e-05, - "loss": 0.7804, - "step": 108020 - }, - { - "epoch": 0.9550204211531321, - "grad_norm": 2.7359728813171387, - "learning_rate": 3.4082992980781134e-05, - "loss": 0.6383, - "step": 108030 - }, - { - "epoch": 0.9551088244134444, - "grad_norm": 2.0213334560394287, - "learning_rate": 3.408151959310926e-05, - "loss": 0.7167, - "step": 108040 - }, - { - "epoch": 0.9551972276737566, - "grad_norm": 1.583143711090088, - "learning_rate": 3.408004620543739e-05, - "loss": 0.7339, - "step": 108050 - }, - { - "epoch": 0.9552856309340688, - "grad_norm": 2.750746011734009, - "learning_rate": 3.407857281776552e-05, - "loss": 0.7033, - "step": 108060 - }, - { - "epoch": 0.955374034194381, - "grad_norm": 2.987856388092041, - "learning_rate": 3.4077099430093654e-05, - "loss": 0.5844, - "step": 108070 - }, - { - "epoch": 0.9554624374546933, - "grad_norm": 2.7189905643463135, - "learning_rate": 3.407562604242178e-05, - "loss": 0.8291, - "step": 108080 - }, - { - "epoch": 0.9555508407150056, - "grad_norm": 1.5544456243515015, - "learning_rate": 3.407415265474991e-05, - "loss": 0.6861, - "step": 108090 - }, - { - "epoch": 0.9556392439753179, - "grad_norm": 3.2795040607452393, - "learning_rate": 3.407267926707804e-05, - "loss": 0.707, - "step": 108100 - }, - { - "epoch": 0.9557276472356301, - "grad_norm": 1.6470195055007935, - "learning_rate": 3.407120587940617e-05, - "loss": 0.7609, - "step": 108110 - }, - { - "epoch": 0.9558160504959423, - "grad_norm": 7.1803507804870605, - "learning_rate": 3.4069732491734296e-05, - "loss": 0.7388, - "step": 108120 - }, - { - "epoch": 0.9559044537562545, - "grad_norm": 3.234081506729126, - "learning_rate": 3.406825910406243e-05, - "loss": 0.6063, - "step": 108130 - }, - { - "epoch": 0.9559928570165668, - "grad_norm": 0.897541344165802, - "learning_rate": 3.406678571639055e-05, - "loss": 0.5949, - "step": 108140 - }, - { - "epoch": 0.956081260276879, - "grad_norm": 2.718233585357666, - "learning_rate": 3.406531232871869e-05, - "loss": 0.584, - "step": 108150 - }, - { - "epoch": 0.9561696635371912, - "grad_norm": 3.2134108543395996, - "learning_rate": 3.4063838941046816e-05, - "loss": 0.6865, - "step": 108160 - }, - { - "epoch": 0.9562580667975035, - "grad_norm": 12.512566566467285, - "learning_rate": 3.4062365553374944e-05, - "loss": 0.5692, - "step": 108170 - }, - { - "epoch": 0.9563464700578157, - "grad_norm": 3.26753830909729, - "learning_rate": 3.406089216570307e-05, - "loss": 0.7239, - "step": 108180 - }, - { - "epoch": 0.9564348733181279, - "grad_norm": 4.158071041107178, - "learning_rate": 3.405941877803121e-05, - "loss": 0.5502, - "step": 108190 - }, - { - "epoch": 0.9565232765784402, - "grad_norm": 1.5002751350402832, - "learning_rate": 3.405794539035933e-05, - "loss": 0.7161, - "step": 108200 - }, - { - "epoch": 0.9566116798387525, - "grad_norm": 1.444059133529663, - "learning_rate": 3.4056472002687464e-05, - "loss": 0.6801, - "step": 108210 - }, - { - "epoch": 0.9567000830990647, - "grad_norm": 2.915778636932373, - "learning_rate": 3.4054998615015586e-05, - "loss": 0.5348, - "step": 108220 - }, - { - "epoch": 0.956788486359377, - "grad_norm": 1.5096100568771362, - "learning_rate": 3.405352522734372e-05, - "loss": 0.5295, - "step": 108230 - }, - { - "epoch": 0.9568768896196892, - "grad_norm": 4.211172103881836, - "learning_rate": 3.405205183967185e-05, - "loss": 0.5911, - "step": 108240 - }, - { - "epoch": 0.9569652928800014, - "grad_norm": 2.9278645515441895, - "learning_rate": 3.405057845199998e-05, - "loss": 0.7571, - "step": 108250 - }, - { - "epoch": 0.9570536961403137, - "grad_norm": 1.5982617139816284, - "learning_rate": 3.4049105064328106e-05, - "loss": 0.617, - "step": 108260 - }, - { - "epoch": 0.9571420994006259, - "grad_norm": 3.509449005126953, - "learning_rate": 3.404763167665624e-05, - "loss": 0.7241, - "step": 108270 - }, - { - "epoch": 0.9572305026609381, - "grad_norm": 5.647476673126221, - "learning_rate": 3.404615828898436e-05, - "loss": 0.6549, - "step": 108280 - }, - { - "epoch": 0.9573189059212504, - "grad_norm": 2.6125192642211914, - "learning_rate": 3.40446849013125e-05, - "loss": 0.7135, - "step": 108290 - }, - { - "epoch": 0.9574073091815626, - "grad_norm": 3.9579532146453857, - "learning_rate": 3.4043211513640626e-05, - "loss": 0.6965, - "step": 108300 - }, - { - "epoch": 0.9574957124418748, - "grad_norm": 4.575448989868164, - "learning_rate": 3.4041738125968755e-05, - "loss": 0.6583, - "step": 108310 - }, - { - "epoch": 0.957584115702187, - "grad_norm": 1.6923058032989502, - "learning_rate": 3.404026473829688e-05, - "loss": 0.6257, - "step": 108320 - }, - { - "epoch": 0.9576725189624994, - "grad_norm": 3.6821818351745605, - "learning_rate": 3.403879135062501e-05, - "loss": 0.6621, - "step": 108330 - }, - { - "epoch": 0.9577609222228116, - "grad_norm": 3.80184006690979, - "learning_rate": 3.403731796295314e-05, - "loss": 0.7091, - "step": 108340 - }, - { - "epoch": 0.9578493254831238, - "grad_norm": 2.4954137802124023, - "learning_rate": 3.4035844575281275e-05, - "loss": 0.6857, - "step": 108350 - }, - { - "epoch": 0.9579377287434361, - "grad_norm": 14.376485824584961, - "learning_rate": 3.4034371187609397e-05, - "loss": 0.6561, - "step": 108360 - }, - { - "epoch": 0.9580261320037483, - "grad_norm": 1.7742670774459839, - "learning_rate": 3.403289779993753e-05, - "loss": 0.6778, - "step": 108370 - }, - { - "epoch": 0.9581145352640605, - "grad_norm": 1.6553324460983276, - "learning_rate": 3.403142441226566e-05, - "loss": 0.694, - "step": 108380 - }, - { - "epoch": 0.9582029385243728, - "grad_norm": 2.247838020324707, - "learning_rate": 3.402995102459379e-05, - "loss": 0.6593, - "step": 108390 - }, - { - "epoch": 0.958291341784685, - "grad_norm": 9.521095275878906, - "learning_rate": 3.402847763692192e-05, - "loss": 0.6383, - "step": 108400 - }, - { - "epoch": 0.9583797450449972, - "grad_norm": 1.7256803512573242, - "learning_rate": 3.402700424925005e-05, - "loss": 0.7085, - "step": 108410 - }, - { - "epoch": 0.9584681483053095, - "grad_norm": 12.944317817687988, - "learning_rate": 3.4025530861578173e-05, - "loss": 0.7791, - "step": 108420 - }, - { - "epoch": 0.9585565515656217, - "grad_norm": 2.0162668228149414, - "learning_rate": 3.402405747390631e-05, - "loss": 0.6895, - "step": 108430 - }, - { - "epoch": 0.9586449548259339, - "grad_norm": 1.2397549152374268, - "learning_rate": 3.402258408623444e-05, - "loss": 0.5211, - "step": 108440 - }, - { - "epoch": 0.9587333580862463, - "grad_norm": 5.9702067375183105, - "learning_rate": 3.4021110698562565e-05, - "loss": 0.5716, - "step": 108450 - }, - { - "epoch": 0.9588217613465585, - "grad_norm": 3.423736095428467, - "learning_rate": 3.4019637310890694e-05, - "loss": 0.611, - "step": 108460 - }, - { - "epoch": 0.9589101646068707, - "grad_norm": 3.6585564613342285, - "learning_rate": 3.401816392321882e-05, - "loss": 0.6566, - "step": 108470 - }, - { - "epoch": 0.958998567867183, - "grad_norm": 8.351479530334473, - "learning_rate": 3.401669053554695e-05, - "loss": 0.6843, - "step": 108480 - }, - { - "epoch": 0.9590869711274952, - "grad_norm": 1.998821496963501, - "learning_rate": 3.4015217147875085e-05, - "loss": 0.7432, - "step": 108490 - }, - { - "epoch": 0.9591753743878074, - "grad_norm": 5.348984718322754, - "learning_rate": 3.401374376020321e-05, - "loss": 0.6328, - "step": 108500 - }, - { - "epoch": 0.9592637776481197, - "grad_norm": 8.680070877075195, - "learning_rate": 3.401227037253134e-05, - "loss": 0.6618, - "step": 108510 - }, - { - "epoch": 0.9593521809084319, - "grad_norm": 2.1394660472869873, - "learning_rate": 3.401079698485947e-05, - "loss": 0.556, - "step": 108520 - }, - { - "epoch": 0.9594405841687441, - "grad_norm": 5.013734340667725, - "learning_rate": 3.40093235971876e-05, - "loss": 0.8058, - "step": 108530 - }, - { - "epoch": 0.9595289874290563, - "grad_norm": 2.7199923992156982, - "learning_rate": 3.400785020951573e-05, - "loss": 0.7137, - "step": 108540 - }, - { - "epoch": 0.9596173906893686, - "grad_norm": 1.7995045185089111, - "learning_rate": 3.400637682184386e-05, - "loss": 0.5576, - "step": 108550 - }, - { - "epoch": 0.9597057939496809, - "grad_norm": 3.206547498703003, - "learning_rate": 3.4004903434171984e-05, - "loss": 0.5018, - "step": 108560 - }, - { - "epoch": 0.9597941972099931, - "grad_norm": 6.213366508483887, - "learning_rate": 3.400343004650012e-05, - "loss": 0.6283, - "step": 108570 - }, - { - "epoch": 0.9598826004703054, - "grad_norm": 8.289933204650879, - "learning_rate": 3.400195665882824e-05, - "loss": 0.6832, - "step": 108580 - }, - { - "epoch": 0.9599710037306176, - "grad_norm": 7.2982964515686035, - "learning_rate": 3.4000483271156376e-05, - "loss": 0.6606, - "step": 108590 - }, - { - "epoch": 0.9600594069909298, - "grad_norm": 5.151221752166748, - "learning_rate": 3.3999009883484504e-05, - "loss": 0.6167, - "step": 108600 - }, - { - "epoch": 0.9601478102512421, - "grad_norm": 2.101573944091797, - "learning_rate": 3.399753649581263e-05, - "loss": 0.6367, - "step": 108610 - }, - { - "epoch": 0.9602362135115543, - "grad_norm": 1.991464614868164, - "learning_rate": 3.399606310814076e-05, - "loss": 0.5753, - "step": 108620 - }, - { - "epoch": 0.9603246167718665, - "grad_norm": 2.156512498855591, - "learning_rate": 3.3994589720468896e-05, - "loss": 0.5939, - "step": 108630 - }, - { - "epoch": 0.9604130200321788, - "grad_norm": 1.273897409439087, - "learning_rate": 3.399311633279702e-05, - "loss": 0.6029, - "step": 108640 - }, - { - "epoch": 0.960501423292491, - "grad_norm": 5.283244609832764, - "learning_rate": 3.399164294512515e-05, - "loss": 0.6629, - "step": 108650 - }, - { - "epoch": 0.9605898265528032, - "grad_norm": 5.072693824768066, - "learning_rate": 3.399016955745328e-05, - "loss": 0.7361, - "step": 108660 - }, - { - "epoch": 0.9606782298131155, - "grad_norm": 5.455718517303467, - "learning_rate": 3.398869616978141e-05, - "loss": 0.7345, - "step": 108670 - }, - { - "epoch": 0.9607666330734278, - "grad_norm": 6.620666027069092, - "learning_rate": 3.398722278210954e-05, - "loss": 0.6886, - "step": 108680 - }, - { - "epoch": 0.96085503633374, - "grad_norm": 8.618492126464844, - "learning_rate": 3.3985749394437666e-05, - "loss": 0.7215, - "step": 108690 - }, - { - "epoch": 0.9609434395940523, - "grad_norm": 6.06873083114624, - "learning_rate": 3.3984276006765794e-05, - "loss": 0.6624, - "step": 108700 - }, - { - "epoch": 0.9610318428543645, - "grad_norm": 5.372738838195801, - "learning_rate": 3.398280261909393e-05, - "loss": 0.6533, - "step": 108710 - }, - { - "epoch": 0.9611202461146767, - "grad_norm": 1.5381653308868408, - "learning_rate": 3.398132923142205e-05, - "loss": 0.6658, - "step": 108720 - }, - { - "epoch": 0.961208649374989, - "grad_norm": 10.10377311706543, - "learning_rate": 3.3979855843750186e-05, - "loss": 0.8451, - "step": 108730 - }, - { - "epoch": 0.9612970526353012, - "grad_norm": 7.399298667907715, - "learning_rate": 3.3978382456078315e-05, - "loss": 0.6932, - "step": 108740 - }, - { - "epoch": 0.9613854558956134, - "grad_norm": 2.0910897254943848, - "learning_rate": 3.397690906840644e-05, - "loss": 0.5614, - "step": 108750 - }, - { - "epoch": 0.9614738591559256, - "grad_norm": 2.026648759841919, - "learning_rate": 3.397543568073457e-05, - "loss": 0.6436, - "step": 108760 - }, - { - "epoch": 0.9615622624162379, - "grad_norm": 2.5890772342681885, - "learning_rate": 3.3973962293062707e-05, - "loss": 0.6001, - "step": 108770 - }, - { - "epoch": 0.9616506656765501, - "grad_norm": 5.992710590362549, - "learning_rate": 3.397248890539083e-05, - "loss": 0.7304, - "step": 108780 - }, - { - "epoch": 0.9617390689368623, - "grad_norm": 3.517192840576172, - "learning_rate": 3.397101551771896e-05, - "loss": 0.7675, - "step": 108790 - }, - { - "epoch": 0.9618274721971747, - "grad_norm": 1.4823883771896362, - "learning_rate": 3.396954213004709e-05, - "loss": 0.5131, - "step": 108800 - }, - { - "epoch": 0.9619158754574869, - "grad_norm": 2.427386522293091, - "learning_rate": 3.396806874237522e-05, - "loss": 0.5861, - "step": 108810 - }, - { - "epoch": 0.9620042787177991, - "grad_norm": 1.6353212594985962, - "learning_rate": 3.396659535470335e-05, - "loss": 0.7368, - "step": 108820 - }, - { - "epoch": 0.9620926819781114, - "grad_norm": 4.7394280433654785, - "learning_rate": 3.396512196703148e-05, - "loss": 0.6409, - "step": 108830 - }, - { - "epoch": 0.9621810852384236, - "grad_norm": 1.347387433052063, - "learning_rate": 3.3963648579359605e-05, - "loss": 0.6165, - "step": 108840 - }, - { - "epoch": 0.9622694884987358, - "grad_norm": 5.459799289703369, - "learning_rate": 3.396217519168774e-05, - "loss": 0.6089, - "step": 108850 - }, - { - "epoch": 0.9623578917590481, - "grad_norm": 1.796870231628418, - "learning_rate": 3.396070180401587e-05, - "loss": 0.5329, - "step": 108860 - }, - { - "epoch": 0.9624462950193603, - "grad_norm": 7.6122846603393555, - "learning_rate": 3.3959228416344e-05, - "loss": 0.6, - "step": 108870 - }, - { - "epoch": 0.9625346982796725, - "grad_norm": 5.274385452270508, - "learning_rate": 3.3957755028672125e-05, - "loss": 0.6126, - "step": 108880 - }, - { - "epoch": 0.9626231015399848, - "grad_norm": 3.3590378761291504, - "learning_rate": 3.3956281641000254e-05, - "loss": 0.6181, - "step": 108890 - }, - { - "epoch": 0.962711504800297, - "grad_norm": 3.373491048812866, - "learning_rate": 3.395480825332838e-05, - "loss": 0.7215, - "step": 108900 - }, - { - "epoch": 0.9627999080606092, - "grad_norm": 4.337435245513916, - "learning_rate": 3.395333486565652e-05, - "loss": 0.6599, - "step": 108910 - }, - { - "epoch": 0.9628883113209216, - "grad_norm": 7.997978687286377, - "learning_rate": 3.3951861477984645e-05, - "loss": 0.6328, - "step": 108920 - }, - { - "epoch": 0.9629767145812338, - "grad_norm": 9.2746000289917, - "learning_rate": 3.3950388090312774e-05, - "loss": 0.7003, - "step": 108930 - }, - { - "epoch": 0.963065117841546, - "grad_norm": 3.2821590900421143, - "learning_rate": 3.39489147026409e-05, - "loss": 0.6253, - "step": 108940 - }, - { - "epoch": 0.9631535211018583, - "grad_norm": 1.6224504709243774, - "learning_rate": 3.394744131496903e-05, - "loss": 0.6933, - "step": 108950 - }, - { - "epoch": 0.9632419243621705, - "grad_norm": 2.4602506160736084, - "learning_rate": 3.394596792729716e-05, - "loss": 0.6175, - "step": 108960 - }, - { - "epoch": 0.9633303276224827, - "grad_norm": 3.0170340538024902, - "learning_rate": 3.394449453962529e-05, - "loss": 0.6648, - "step": 108970 - }, - { - "epoch": 0.963418730882795, - "grad_norm": 4.795486927032471, - "learning_rate": 3.394302115195342e-05, - "loss": 0.6548, - "step": 108980 - }, - { - "epoch": 0.9635071341431072, - "grad_norm": 0.5149299502372742, - "learning_rate": 3.394154776428155e-05, - "loss": 0.5897, - "step": 108990 - }, - { - "epoch": 0.9635955374034194, - "grad_norm": 2.1548361778259277, - "learning_rate": 3.394007437660968e-05, - "loss": 0.5996, - "step": 109000 - }, - { - "epoch": 0.9636839406637316, - "grad_norm": 1.3532441854476929, - "learning_rate": 3.393860098893781e-05, - "loss": 0.5124, - "step": 109010 - }, - { - "epoch": 0.9637723439240439, - "grad_norm": 4.723729133605957, - "learning_rate": 3.3937127601265936e-05, - "loss": 0.5618, - "step": 109020 - }, - { - "epoch": 0.9638607471843561, - "grad_norm": 2.310853958129883, - "learning_rate": 3.3935654213594064e-05, - "loss": 0.7577, - "step": 109030 - }, - { - "epoch": 0.9639491504446684, - "grad_norm": 2.4075546264648438, - "learning_rate": 3.39341808259222e-05, - "loss": 0.6662, - "step": 109040 - }, - { - "epoch": 0.9640375537049807, - "grad_norm": 2.7014997005462646, - "learning_rate": 3.393270743825032e-05, - "loss": 0.6357, - "step": 109050 - }, - { - "epoch": 0.9641259569652929, - "grad_norm": 4.416240692138672, - "learning_rate": 3.3931234050578456e-05, - "loss": 0.6237, - "step": 109060 - }, - { - "epoch": 0.9642143602256051, - "grad_norm": 9.041378021240234, - "learning_rate": 3.3929760662906584e-05, - "loss": 0.6293, - "step": 109070 - }, - { - "epoch": 0.9643027634859174, - "grad_norm": 3.652994155883789, - "learning_rate": 3.392828727523471e-05, - "loss": 0.647, - "step": 109080 - }, - { - "epoch": 0.9643911667462296, - "grad_norm": 4.848643779754639, - "learning_rate": 3.392681388756284e-05, - "loss": 0.648, - "step": 109090 - }, - { - "epoch": 0.9644795700065418, - "grad_norm": 1.7461626529693604, - "learning_rate": 3.3925340499890976e-05, - "loss": 0.6352, - "step": 109100 - }, - { - "epoch": 0.9645679732668541, - "grad_norm": 2.765460968017578, - "learning_rate": 3.39238671122191e-05, - "loss": 0.6671, - "step": 109110 - }, - { - "epoch": 0.9646563765271663, - "grad_norm": 2.8923518657684326, - "learning_rate": 3.392239372454723e-05, - "loss": 0.6095, - "step": 109120 - }, - { - "epoch": 0.9647447797874785, - "grad_norm": 2.7751667499542236, - "learning_rate": 3.392092033687536e-05, - "loss": 0.6744, - "step": 109130 - }, - { - "epoch": 0.9648331830477908, - "grad_norm": 3.464087963104248, - "learning_rate": 3.391944694920349e-05, - "loss": 0.7128, - "step": 109140 - }, - { - "epoch": 0.9649215863081031, - "grad_norm": 2.119079828262329, - "learning_rate": 3.391797356153162e-05, - "loss": 0.5623, - "step": 109150 - }, - { - "epoch": 0.9650099895684153, - "grad_norm": 1.3652396202087402, - "learning_rate": 3.3916500173859746e-05, - "loss": 0.7196, - "step": 109160 - }, - { - "epoch": 0.9650983928287276, - "grad_norm": 2.512056350708008, - "learning_rate": 3.3915026786187875e-05, - "loss": 0.7035, - "step": 109170 - }, - { - "epoch": 0.9651867960890398, - "grad_norm": 19.401247024536133, - "learning_rate": 3.391355339851601e-05, - "loss": 0.7211, - "step": 109180 - }, - { - "epoch": 0.965275199349352, - "grad_norm": 3.0264909267425537, - "learning_rate": 3.391208001084413e-05, - "loss": 0.5732, - "step": 109190 - }, - { - "epoch": 0.9653636026096643, - "grad_norm": 1.6094777584075928, - "learning_rate": 3.3910606623172266e-05, - "loss": 0.599, - "step": 109200 - }, - { - "epoch": 0.9654520058699765, - "grad_norm": 2.056447982788086, - "learning_rate": 3.3909133235500395e-05, - "loss": 0.6274, - "step": 109210 - }, - { - "epoch": 0.9655404091302887, - "grad_norm": 1.327606439590454, - "learning_rate": 3.390765984782852e-05, - "loss": 0.6552, - "step": 109220 - }, - { - "epoch": 0.9656288123906009, - "grad_norm": 2.1870946884155273, - "learning_rate": 3.390618646015665e-05, - "loss": 0.9189, - "step": 109230 - }, - { - "epoch": 0.9657172156509132, - "grad_norm": 4.863929748535156, - "learning_rate": 3.3904713072484787e-05, - "loss": 0.696, - "step": 109240 - }, - { - "epoch": 0.9658056189112254, - "grad_norm": 5.8540449142456055, - "learning_rate": 3.390323968481291e-05, - "loss": 0.6276, - "step": 109250 - }, - { - "epoch": 0.9658940221715376, - "grad_norm": 2.619868755340576, - "learning_rate": 3.390176629714104e-05, - "loss": 0.6666, - "step": 109260 - }, - { - "epoch": 0.96598242543185, - "grad_norm": 5.3761444091796875, - "learning_rate": 3.3900292909469165e-05, - "loss": 0.6263, - "step": 109270 - }, - { - "epoch": 0.9660708286921622, - "grad_norm": 3.354771852493286, - "learning_rate": 3.38988195217973e-05, - "loss": 0.6204, - "step": 109280 - }, - { - "epoch": 0.9661592319524744, - "grad_norm": 2.9355287551879883, - "learning_rate": 3.389734613412543e-05, - "loss": 0.5347, - "step": 109290 - }, - { - "epoch": 0.9662476352127867, - "grad_norm": 1.8563660383224487, - "learning_rate": 3.389587274645356e-05, - "loss": 0.6203, - "step": 109300 - }, - { - "epoch": 0.9663360384730989, - "grad_norm": 1.3764989376068115, - "learning_rate": 3.3894399358781685e-05, - "loss": 0.6076, - "step": 109310 - }, - { - "epoch": 0.9664244417334111, - "grad_norm": 1.4530961513519287, - "learning_rate": 3.389292597110982e-05, - "loss": 0.5924, - "step": 109320 - }, - { - "epoch": 0.9665128449937234, - "grad_norm": 7.1609578132629395, - "learning_rate": 3.389145258343794e-05, - "loss": 0.4917, - "step": 109330 - }, - { - "epoch": 0.9666012482540356, - "grad_norm": 2.37711238861084, - "learning_rate": 3.388997919576608e-05, - "loss": 0.6494, - "step": 109340 - }, - { - "epoch": 0.9666896515143478, - "grad_norm": 3.3385396003723145, - "learning_rate": 3.3888505808094205e-05, - "loss": 0.6843, - "step": 109350 - }, - { - "epoch": 0.9667780547746601, - "grad_norm": 4.974733829498291, - "learning_rate": 3.3887032420422334e-05, - "loss": 0.5208, - "step": 109360 - }, - { - "epoch": 0.9668664580349723, - "grad_norm": 3.60251522064209, - "learning_rate": 3.388555903275046e-05, - "loss": 0.6596, - "step": 109370 - }, - { - "epoch": 0.9669548612952845, - "grad_norm": 7.9344024658203125, - "learning_rate": 3.38840856450786e-05, - "loss": 0.655, - "step": 109380 - }, - { - "epoch": 0.9670432645555969, - "grad_norm": 0.9193863272666931, - "learning_rate": 3.388261225740672e-05, - "loss": 0.6953, - "step": 109390 - }, - { - "epoch": 0.9671316678159091, - "grad_norm": 1.1478573083877563, - "learning_rate": 3.3881138869734854e-05, - "loss": 0.6202, - "step": 109400 - }, - { - "epoch": 0.9672200710762213, - "grad_norm": 6.346615791320801, - "learning_rate": 3.3879665482062975e-05, - "loss": 0.7277, - "step": 109410 - }, - { - "epoch": 0.9673084743365336, - "grad_norm": 4.443350315093994, - "learning_rate": 3.387819209439111e-05, - "loss": 0.5476, - "step": 109420 - }, - { - "epoch": 0.9673968775968458, - "grad_norm": 2.1637489795684814, - "learning_rate": 3.387671870671924e-05, - "loss": 0.664, - "step": 109430 - }, - { - "epoch": 0.967485280857158, - "grad_norm": 1.067169189453125, - "learning_rate": 3.387524531904737e-05, - "loss": 0.6302, - "step": 109440 - }, - { - "epoch": 0.9675736841174702, - "grad_norm": 2.1617817878723145, - "learning_rate": 3.3873771931375496e-05, - "loss": 0.5546, - "step": 109450 - }, - { - "epoch": 0.9676620873777825, - "grad_norm": 3.400785446166992, - "learning_rate": 3.387229854370363e-05, - "loss": 0.6219, - "step": 109460 - }, - { - "epoch": 0.9677504906380947, - "grad_norm": 0.7347487211227417, - "learning_rate": 3.387082515603175e-05, - "loss": 0.5365, - "step": 109470 - }, - { - "epoch": 0.9678388938984069, - "grad_norm": 1.5539226531982422, - "learning_rate": 3.386935176835989e-05, - "loss": 0.5995, - "step": 109480 - }, - { - "epoch": 0.9679272971587192, - "grad_norm": 2.5931179523468018, - "learning_rate": 3.3867878380688016e-05, - "loss": 0.6951, - "step": 109490 - }, - { - "epoch": 0.9680157004190314, - "grad_norm": 2.577695608139038, - "learning_rate": 3.3866404993016144e-05, - "loss": 0.711, - "step": 109500 - }, - { - "epoch": 0.9681041036793437, - "grad_norm": 3.2594804763793945, - "learning_rate": 3.386493160534427e-05, - "loss": 0.7247, - "step": 109510 - }, - { - "epoch": 0.968192506939656, - "grad_norm": 3.4535417556762695, - "learning_rate": 3.38634582176724e-05, - "loss": 0.6625, - "step": 109520 - }, - { - "epoch": 0.9682809101999682, - "grad_norm": 2.1193764209747314, - "learning_rate": 3.386198483000053e-05, - "loss": 0.6772, - "step": 109530 - }, - { - "epoch": 0.9683693134602804, - "grad_norm": 1.722532033920288, - "learning_rate": 3.3860511442328664e-05, - "loss": 0.728, - "step": 109540 - }, - { - "epoch": 0.9684577167205927, - "grad_norm": 3.240015983581543, - "learning_rate": 3.3859038054656786e-05, - "loss": 0.5037, - "step": 109550 - }, - { - "epoch": 0.9685461199809049, - "grad_norm": 4.359414100646973, - "learning_rate": 3.385756466698492e-05, - "loss": 0.7401, - "step": 109560 - }, - { - "epoch": 0.9686345232412171, - "grad_norm": 1.9799870252609253, - "learning_rate": 3.385609127931305e-05, - "loss": 0.7385, - "step": 109570 - }, - { - "epoch": 0.9687229265015294, - "grad_norm": 3.4243040084838867, - "learning_rate": 3.385461789164118e-05, - "loss": 0.6147, - "step": 109580 - }, - { - "epoch": 0.9688113297618416, - "grad_norm": 1.2312356233596802, - "learning_rate": 3.3853144503969306e-05, - "loss": 0.71, - "step": 109590 - }, - { - "epoch": 0.9688997330221538, - "grad_norm": 1.8239474296569824, - "learning_rate": 3.385167111629744e-05, - "loss": 0.7736, - "step": 109600 - }, - { - "epoch": 0.968988136282466, - "grad_norm": 5.502866744995117, - "learning_rate": 3.385019772862556e-05, - "loss": 0.6725, - "step": 109610 - }, - { - "epoch": 0.9690765395427784, - "grad_norm": 1.8239455223083496, - "learning_rate": 3.38487243409537e-05, - "loss": 0.5721, - "step": 109620 - }, - { - "epoch": 0.9691649428030906, - "grad_norm": 6.233642101287842, - "learning_rate": 3.384725095328182e-05, - "loss": 0.5787, - "step": 109630 - }, - { - "epoch": 0.9692533460634029, - "grad_norm": 2.3092336654663086, - "learning_rate": 3.3845777565609955e-05, - "loss": 0.666, - "step": 109640 - }, - { - "epoch": 0.9693417493237151, - "grad_norm": 2.7663841247558594, - "learning_rate": 3.384430417793808e-05, - "loss": 0.7158, - "step": 109650 - }, - { - "epoch": 0.9694301525840273, - "grad_norm": 0.8950415253639221, - "learning_rate": 3.384283079026621e-05, - "loss": 0.6888, - "step": 109660 - }, - { - "epoch": 0.9695185558443395, - "grad_norm": 2.2749557495117188, - "learning_rate": 3.384135740259434e-05, - "loss": 0.6463, - "step": 109670 - }, - { - "epoch": 0.9696069591046518, - "grad_norm": 3.1506237983703613, - "learning_rate": 3.3839884014922475e-05, - "loss": 0.7202, - "step": 109680 - }, - { - "epoch": 0.969695362364964, - "grad_norm": 1.6731704473495483, - "learning_rate": 3.3838410627250596e-05, - "loss": 0.5419, - "step": 109690 - }, - { - "epoch": 0.9697837656252762, - "grad_norm": 7.018250942230225, - "learning_rate": 3.383693723957873e-05, - "loss": 0.706, - "step": 109700 - }, - { - "epoch": 0.9698721688855885, - "grad_norm": 4.473487854003906, - "learning_rate": 3.383546385190686e-05, - "loss": 0.6328, - "step": 109710 - }, - { - "epoch": 0.9699605721459007, - "grad_norm": 2.5180888175964355, - "learning_rate": 3.383399046423499e-05, - "loss": 0.7621, - "step": 109720 - }, - { - "epoch": 0.9700489754062129, - "grad_norm": 2.5558810234069824, - "learning_rate": 3.3832517076563117e-05, - "loss": 0.7036, - "step": 109730 - }, - { - "epoch": 0.9701373786665253, - "grad_norm": 3.7552618980407715, - "learning_rate": 3.3831043688891245e-05, - "loss": 0.6776, - "step": 109740 - }, - { - "epoch": 0.9702257819268375, - "grad_norm": 3.698937177658081, - "learning_rate": 3.382957030121937e-05, - "loss": 0.6538, - "step": 109750 - }, - { - "epoch": 0.9703141851871497, - "grad_norm": 3.056321859359741, - "learning_rate": 3.382809691354751e-05, - "loss": 0.6508, - "step": 109760 - }, - { - "epoch": 0.970402588447462, - "grad_norm": 4.742011070251465, - "learning_rate": 3.382662352587564e-05, - "loss": 0.5984, - "step": 109770 - }, - { - "epoch": 0.9704909917077742, - "grad_norm": 2.4124088287353516, - "learning_rate": 3.3825150138203765e-05, - "loss": 0.5993, - "step": 109780 - }, - { - "epoch": 0.9705793949680864, - "grad_norm": 2.9382693767547607, - "learning_rate": 3.3823676750531893e-05, - "loss": 0.5611, - "step": 109790 - }, - { - "epoch": 0.9706677982283987, - "grad_norm": 1.912672758102417, - "learning_rate": 3.382220336286002e-05, - "loss": 0.5495, - "step": 109800 - }, - { - "epoch": 0.9707562014887109, - "grad_norm": 1.6104891300201416, - "learning_rate": 3.382072997518815e-05, - "loss": 0.5484, - "step": 109810 - }, - { - "epoch": 0.9708446047490231, - "grad_norm": 13.708107948303223, - "learning_rate": 3.3819256587516285e-05, - "loss": 0.6513, - "step": 109820 - }, - { - "epoch": 0.9709330080093354, - "grad_norm": 2.5372703075408936, - "learning_rate": 3.3817783199844414e-05, - "loss": 0.6601, - "step": 109830 - }, - { - "epoch": 0.9710214112696476, - "grad_norm": 2.0031867027282715, - "learning_rate": 3.381630981217254e-05, - "loss": 0.6349, - "step": 109840 - }, - { - "epoch": 0.9711098145299598, - "grad_norm": 28.69565200805664, - "learning_rate": 3.381483642450067e-05, - "loss": 0.6368, - "step": 109850 - }, - { - "epoch": 0.9711982177902722, - "grad_norm": 2.5293679237365723, - "learning_rate": 3.38133630368288e-05, - "loss": 0.7287, - "step": 109860 - }, - { - "epoch": 0.9712866210505844, - "grad_norm": 5.0612945556640625, - "learning_rate": 3.381188964915693e-05, - "loss": 0.6839, - "step": 109870 - }, - { - "epoch": 0.9713750243108966, - "grad_norm": 10.466364860534668, - "learning_rate": 3.3810416261485055e-05, - "loss": 0.6798, - "step": 109880 - }, - { - "epoch": 0.9714634275712088, - "grad_norm": 1.5394682884216309, - "learning_rate": 3.380894287381319e-05, - "loss": 0.6237, - "step": 109890 - }, - { - "epoch": 0.9715518308315211, - "grad_norm": 1.3443679809570312, - "learning_rate": 3.380746948614132e-05, - "loss": 0.5754, - "step": 109900 - }, - { - "epoch": 0.9716402340918333, - "grad_norm": 12.633038520812988, - "learning_rate": 3.380599609846945e-05, - "loss": 0.7017, - "step": 109910 - }, - { - "epoch": 0.9717286373521455, - "grad_norm": 2.1645145416259766, - "learning_rate": 3.3804522710797576e-05, - "loss": 0.4924, - "step": 109920 - }, - { - "epoch": 0.9718170406124578, - "grad_norm": 5.534739971160889, - "learning_rate": 3.3803049323125704e-05, - "loss": 0.6194, - "step": 109930 - }, - { - "epoch": 0.97190544387277, - "grad_norm": 8.998230934143066, - "learning_rate": 3.380157593545383e-05, - "loss": 0.5662, - "step": 109940 - }, - { - "epoch": 0.9719938471330822, - "grad_norm": 3.581232786178589, - "learning_rate": 3.380010254778197e-05, - "loss": 0.9005, - "step": 109950 - }, - { - "epoch": 0.9720822503933945, - "grad_norm": 6.763643741607666, - "learning_rate": 3.3798629160110096e-05, - "loss": 0.685, - "step": 109960 - }, - { - "epoch": 0.9721706536537067, - "grad_norm": 6.694692134857178, - "learning_rate": 3.3797155772438224e-05, - "loss": 0.5331, - "step": 109970 - }, - { - "epoch": 0.972259056914019, - "grad_norm": 1.338887333869934, - "learning_rate": 3.379568238476635e-05, - "loss": 0.7075, - "step": 109980 - }, - { - "epoch": 0.9723474601743313, - "grad_norm": 1.2556545734405518, - "learning_rate": 3.379420899709448e-05, - "loss": 0.679, - "step": 109990 - }, - { - "epoch": 0.9724358634346435, - "grad_norm": 3.0473825931549072, - "learning_rate": 3.379273560942261e-05, - "loss": 0.6411, - "step": 110000 - }, - { - "epoch": 0.9725242666949557, - "grad_norm": 0.9802045822143555, - "learning_rate": 3.3791262221750744e-05, - "loss": 0.6186, - "step": 110010 - }, - { - "epoch": 0.972612669955268, - "grad_norm": 3.4868831634521484, - "learning_rate": 3.3789788834078866e-05, - "loss": 0.7297, - "step": 110020 - }, - { - "epoch": 0.9727010732155802, - "grad_norm": 5.8326826095581055, - "learning_rate": 3.3788315446407e-05, - "loss": 0.6502, - "step": 110030 - }, - { - "epoch": 0.9727894764758924, - "grad_norm": 1.454392910003662, - "learning_rate": 3.378684205873513e-05, - "loss": 0.5798, - "step": 110040 - }, - { - "epoch": 0.9728778797362047, - "grad_norm": 1.8410056829452515, - "learning_rate": 3.378536867106326e-05, - "loss": 0.7029, - "step": 110050 - }, - { - "epoch": 0.9729662829965169, - "grad_norm": 2.8436992168426514, - "learning_rate": 3.3783895283391386e-05, - "loss": 0.7533, - "step": 110060 - }, - { - "epoch": 0.9730546862568291, - "grad_norm": 1.9255040884017944, - "learning_rate": 3.378242189571952e-05, - "loss": 0.7105, - "step": 110070 - }, - { - "epoch": 0.9731430895171413, - "grad_norm": 3.409846544265747, - "learning_rate": 3.378094850804764e-05, - "loss": 0.5517, - "step": 110080 - }, - { - "epoch": 0.9732314927774536, - "grad_norm": 2.6061630249023438, - "learning_rate": 3.377947512037578e-05, - "loss": 0.5854, - "step": 110090 - }, - { - "epoch": 0.9733198960377659, - "grad_norm": 2.2798073291778564, - "learning_rate": 3.37780017327039e-05, - "loss": 0.6064, - "step": 110100 - }, - { - "epoch": 0.9734082992980782, - "grad_norm": 5.488577365875244, - "learning_rate": 3.3776528345032035e-05, - "loss": 0.5987, - "step": 110110 - }, - { - "epoch": 0.9734967025583904, - "grad_norm": 4.310491561889648, - "learning_rate": 3.377505495736016e-05, - "loss": 0.6977, - "step": 110120 - }, - { - "epoch": 0.9735851058187026, - "grad_norm": 1.363355040550232, - "learning_rate": 3.377358156968829e-05, - "loss": 0.6896, - "step": 110130 - }, - { - "epoch": 0.9736735090790148, - "grad_norm": 7.883826732635498, - "learning_rate": 3.377210818201642e-05, - "loss": 0.6362, - "step": 110140 - }, - { - "epoch": 0.9737619123393271, - "grad_norm": 2.7764811515808105, - "learning_rate": 3.3770634794344555e-05, - "loss": 0.6291, - "step": 110150 - }, - { - "epoch": 0.9738503155996393, - "grad_norm": 1.728932499885559, - "learning_rate": 3.3769161406672676e-05, - "loss": 0.6376, - "step": 110160 - }, - { - "epoch": 0.9739387188599515, - "grad_norm": 2.01326847076416, - "learning_rate": 3.376768801900081e-05, - "loss": 0.6448, - "step": 110170 - }, - { - "epoch": 0.9740271221202638, - "grad_norm": 2.6898386478424072, - "learning_rate": 3.376621463132894e-05, - "loss": 0.6115, - "step": 110180 - }, - { - "epoch": 0.974115525380576, - "grad_norm": 3.0468032360076904, - "learning_rate": 3.376474124365707e-05, - "loss": 0.6463, - "step": 110190 - }, - { - "epoch": 0.9742039286408882, - "grad_norm": 1.594526767730713, - "learning_rate": 3.37632678559852e-05, - "loss": 0.6194, - "step": 110200 - }, - { - "epoch": 0.9742923319012006, - "grad_norm": 3.7182111740112305, - "learning_rate": 3.3761794468313325e-05, - "loss": 0.6727, - "step": 110210 - }, - { - "epoch": 0.9743807351615128, - "grad_norm": 1.7522857189178467, - "learning_rate": 3.376032108064145e-05, - "loss": 0.786, - "step": 110220 - }, - { - "epoch": 0.974469138421825, - "grad_norm": 4.859210014343262, - "learning_rate": 3.375884769296959e-05, - "loss": 0.6115, - "step": 110230 - }, - { - "epoch": 0.9745575416821373, - "grad_norm": 1.91049325466156, - "learning_rate": 3.375737430529771e-05, - "loss": 0.5978, - "step": 110240 - }, - { - "epoch": 0.9746459449424495, - "grad_norm": 2.0873186588287354, - "learning_rate": 3.3755900917625845e-05, - "loss": 0.6399, - "step": 110250 - }, - { - "epoch": 0.9747343482027617, - "grad_norm": 2.0097036361694336, - "learning_rate": 3.3754427529953974e-05, - "loss": 0.702, - "step": 110260 - }, - { - "epoch": 0.974822751463074, - "grad_norm": 2.9090471267700195, - "learning_rate": 3.37529541422821e-05, - "loss": 0.5447, - "step": 110270 - }, - { - "epoch": 0.9749111547233862, - "grad_norm": 2.7814157009124756, - "learning_rate": 3.375148075461023e-05, - "loss": 0.6631, - "step": 110280 - }, - { - "epoch": 0.9749995579836984, - "grad_norm": 3.743830680847168, - "learning_rate": 3.3750007366938365e-05, - "loss": 0.6979, - "step": 110290 - }, - { - "epoch": 0.9750879612440106, - "grad_norm": 1.973961353302002, - "learning_rate": 3.374853397926649e-05, - "loss": 0.6843, - "step": 110300 - }, - { - "epoch": 0.9751763645043229, - "grad_norm": 0.7969472408294678, - "learning_rate": 3.374706059159462e-05, - "loss": 0.515, - "step": 110310 - }, - { - "epoch": 0.9752647677646351, - "grad_norm": 0.8885216116905212, - "learning_rate": 3.374558720392275e-05, - "loss": 0.5584, - "step": 110320 - }, - { - "epoch": 0.9753531710249475, - "grad_norm": 3.993535041809082, - "learning_rate": 3.374411381625088e-05, - "loss": 0.6791, - "step": 110330 - }, - { - "epoch": 0.9754415742852597, - "grad_norm": 2.560508966445923, - "learning_rate": 3.374264042857901e-05, - "loss": 0.5503, - "step": 110340 - }, - { - "epoch": 0.9755299775455719, - "grad_norm": 0.5963724255561829, - "learning_rate": 3.3741167040907136e-05, - "loss": 0.6366, - "step": 110350 - }, - { - "epoch": 0.9756183808058841, - "grad_norm": 1.0845046043395996, - "learning_rate": 3.3739693653235264e-05, - "loss": 0.6715, - "step": 110360 - }, - { - "epoch": 0.9757067840661964, - "grad_norm": 3.978794574737549, - "learning_rate": 3.37382202655634e-05, - "loss": 0.6431, - "step": 110370 - }, - { - "epoch": 0.9757951873265086, - "grad_norm": 6.08585786819458, - "learning_rate": 3.373674687789152e-05, - "loss": 0.6128, - "step": 110380 - }, - { - "epoch": 0.9758835905868208, - "grad_norm": 1.7601128816604614, - "learning_rate": 3.3735273490219656e-05, - "loss": 0.5619, - "step": 110390 - }, - { - "epoch": 0.9759719938471331, - "grad_norm": 2.1101343631744385, - "learning_rate": 3.3733800102547784e-05, - "loss": 0.761, - "step": 110400 - }, - { - "epoch": 0.9760603971074453, - "grad_norm": 2.4930419921875, - "learning_rate": 3.373232671487591e-05, - "loss": 0.6937, - "step": 110410 - }, - { - "epoch": 0.9761488003677575, - "grad_norm": 6.182460784912109, - "learning_rate": 3.373085332720404e-05, - "loss": 0.7751, - "step": 110420 - }, - { - "epoch": 0.9762372036280698, - "grad_norm": 3.218567371368408, - "learning_rate": 3.3729379939532176e-05, - "loss": 0.6546, - "step": 110430 - }, - { - "epoch": 0.976325606888382, - "grad_norm": 1.4082136154174805, - "learning_rate": 3.37279065518603e-05, - "loss": 0.5433, - "step": 110440 - }, - { - "epoch": 0.9764140101486943, - "grad_norm": 1.3678123950958252, - "learning_rate": 3.372643316418843e-05, - "loss": 0.6713, - "step": 110450 - }, - { - "epoch": 0.9765024134090066, - "grad_norm": 2.181298017501831, - "learning_rate": 3.3724959776516554e-05, - "loss": 0.6716, - "step": 110460 - }, - { - "epoch": 0.9765908166693188, - "grad_norm": 1.5102198123931885, - "learning_rate": 3.372348638884469e-05, - "loss": 0.7715, - "step": 110470 - }, - { - "epoch": 0.976679219929631, - "grad_norm": 4.327232360839844, - "learning_rate": 3.372201300117282e-05, - "loss": 0.6835, - "step": 110480 - }, - { - "epoch": 0.9767676231899433, - "grad_norm": 2.2418100833892822, - "learning_rate": 3.3720539613500946e-05, - "loss": 0.4445, - "step": 110490 - }, - { - "epoch": 0.9768560264502555, - "grad_norm": 1.5772608518600464, - "learning_rate": 3.3719066225829074e-05, - "loss": 0.5464, - "step": 110500 - }, - { - "epoch": 0.9769444297105677, - "grad_norm": 3.4416377544403076, - "learning_rate": 3.371759283815721e-05, - "loss": 0.5939, - "step": 110510 - }, - { - "epoch": 0.97703283297088, - "grad_norm": 4.3905768394470215, - "learning_rate": 3.371611945048533e-05, - "loss": 0.8269, - "step": 110520 - }, - { - "epoch": 0.9771212362311922, - "grad_norm": 11.387849807739258, - "learning_rate": 3.3714646062813466e-05, - "loss": 0.6816, - "step": 110530 - }, - { - "epoch": 0.9772096394915044, - "grad_norm": 1.5221426486968994, - "learning_rate": 3.3713172675141595e-05, - "loss": 0.6146, - "step": 110540 - }, - { - "epoch": 0.9772980427518166, - "grad_norm": 5.3909101486206055, - "learning_rate": 3.371169928746972e-05, - "loss": 0.6093, - "step": 110550 - }, - { - "epoch": 0.9773864460121289, - "grad_norm": 2.585848808288574, - "learning_rate": 3.371022589979785e-05, - "loss": 0.6838, - "step": 110560 - }, - { - "epoch": 0.9774748492724412, - "grad_norm": 2.381023406982422, - "learning_rate": 3.370875251212598e-05, - "loss": 0.8033, - "step": 110570 - }, - { - "epoch": 0.9775632525327534, - "grad_norm": 4.694600582122803, - "learning_rate": 3.370727912445411e-05, - "loss": 0.704, - "step": 110580 - }, - { - "epoch": 0.9776516557930657, - "grad_norm": 4.067041873931885, - "learning_rate": 3.370580573678224e-05, - "loss": 0.8415, - "step": 110590 - }, - { - "epoch": 0.9777400590533779, - "grad_norm": 4.064582824707031, - "learning_rate": 3.3704332349110365e-05, - "loss": 0.6408, - "step": 110600 - }, - { - "epoch": 0.9778284623136901, - "grad_norm": 1.6221612691879272, - "learning_rate": 3.37028589614385e-05, - "loss": 0.7663, - "step": 110610 - }, - { - "epoch": 0.9779168655740024, - "grad_norm": 5.154999256134033, - "learning_rate": 3.370138557376663e-05, - "loss": 0.7006, - "step": 110620 - }, - { - "epoch": 0.9780052688343146, - "grad_norm": 3.455639123916626, - "learning_rate": 3.3699912186094757e-05, - "loss": 0.5376, - "step": 110630 - }, - { - "epoch": 0.9780936720946268, - "grad_norm": 1.3004995584487915, - "learning_rate": 3.3698438798422885e-05, - "loss": 0.4797, - "step": 110640 - }, - { - "epoch": 0.9781820753549391, - "grad_norm": 6.261152744293213, - "learning_rate": 3.369696541075102e-05, - "loss": 0.6105, - "step": 110650 - }, - { - "epoch": 0.9782704786152513, - "grad_norm": 12.539459228515625, - "learning_rate": 3.369549202307914e-05, - "loss": 0.6637, - "step": 110660 - }, - { - "epoch": 0.9783588818755635, - "grad_norm": 7.47205114364624, - "learning_rate": 3.369401863540728e-05, - "loss": 0.6604, - "step": 110670 - }, - { - "epoch": 0.9784472851358759, - "grad_norm": 1.115787386894226, - "learning_rate": 3.3692545247735405e-05, - "loss": 0.6671, - "step": 110680 - }, - { - "epoch": 0.9785356883961881, - "grad_norm": 1.0838844776153564, - "learning_rate": 3.3691071860063533e-05, - "loss": 0.5285, - "step": 110690 - }, - { - "epoch": 0.9786240916565003, - "grad_norm": 4.756259441375732, - "learning_rate": 3.368959847239166e-05, - "loss": 0.6813, - "step": 110700 - }, - { - "epoch": 0.9787124949168126, - "grad_norm": 8.691108703613281, - "learning_rate": 3.368812508471979e-05, - "loss": 0.6325, - "step": 110710 - }, - { - "epoch": 0.9788008981771248, - "grad_norm": 2.5743041038513184, - "learning_rate": 3.368665169704792e-05, - "loss": 0.6698, - "step": 110720 - }, - { - "epoch": 0.978889301437437, - "grad_norm": 3.716695547103882, - "learning_rate": 3.3685178309376054e-05, - "loss": 0.6944, - "step": 110730 - }, - { - "epoch": 0.9789777046977493, - "grad_norm": 4.441263675689697, - "learning_rate": 3.368370492170418e-05, - "loss": 0.5897, - "step": 110740 - }, - { - "epoch": 0.9790661079580615, - "grad_norm": 3.7443573474884033, - "learning_rate": 3.368223153403231e-05, - "loss": 0.6148, - "step": 110750 - }, - { - "epoch": 0.9791545112183737, - "grad_norm": 7.481271266937256, - "learning_rate": 3.368075814636044e-05, - "loss": 0.5692, - "step": 110760 - }, - { - "epoch": 0.9792429144786859, - "grad_norm": 1.6937706470489502, - "learning_rate": 3.367928475868857e-05, - "loss": 0.6752, - "step": 110770 - }, - { - "epoch": 0.9793313177389982, - "grad_norm": 2.336862087249756, - "learning_rate": 3.3677811371016695e-05, - "loss": 0.5876, - "step": 110780 - }, - { - "epoch": 0.9794197209993104, - "grad_norm": 2.6145122051239014, - "learning_rate": 3.367633798334483e-05, - "loss": 0.7259, - "step": 110790 - }, - { - "epoch": 0.9795081242596227, - "grad_norm": 4.576858043670654, - "learning_rate": 3.367486459567296e-05, - "loss": 0.6567, - "step": 110800 - }, - { - "epoch": 0.979596527519935, - "grad_norm": 2.2098803520202637, - "learning_rate": 3.367339120800109e-05, - "loss": 0.6244, - "step": 110810 - }, - { - "epoch": 0.9796849307802472, - "grad_norm": 2.3720593452453613, - "learning_rate": 3.3671917820329216e-05, - "loss": 0.7029, - "step": 110820 - }, - { - "epoch": 0.9797733340405594, - "grad_norm": 1.2826426029205322, - "learning_rate": 3.3670444432657344e-05, - "loss": 0.6412, - "step": 110830 - }, - { - "epoch": 0.9798617373008717, - "grad_norm": 8.829693794250488, - "learning_rate": 3.366897104498547e-05, - "loss": 0.6462, - "step": 110840 - }, - { - "epoch": 0.9799501405611839, - "grad_norm": 4.648908615112305, - "learning_rate": 3.36674976573136e-05, - "loss": 0.5905, - "step": 110850 - }, - { - "epoch": 0.9800385438214961, - "grad_norm": 0.8421372175216675, - "learning_rate": 3.3666024269641736e-05, - "loss": 0.566, - "step": 110860 - }, - { - "epoch": 0.9801269470818084, - "grad_norm": 6.786984443664551, - "learning_rate": 3.3664550881969864e-05, - "loss": 0.6242, - "step": 110870 - }, - { - "epoch": 0.9802153503421206, - "grad_norm": 1.9286582469940186, - "learning_rate": 3.366307749429799e-05, - "loss": 0.6462, - "step": 110880 - }, - { - "epoch": 0.9803037536024328, - "grad_norm": 2.2931947708129883, - "learning_rate": 3.366160410662612e-05, - "loss": 0.584, - "step": 110890 - }, - { - "epoch": 0.9803921568627451, - "grad_norm": 1.8378829956054688, - "learning_rate": 3.366013071895425e-05, - "loss": 0.5361, - "step": 110900 - }, - { - "epoch": 0.9804805601230573, - "grad_norm": 4.251286029815674, - "learning_rate": 3.365865733128238e-05, - "loss": 0.574, - "step": 110910 - }, - { - "epoch": 0.9805689633833696, - "grad_norm": 2.6717238426208496, - "learning_rate": 3.365718394361051e-05, - "loss": 0.6065, - "step": 110920 - }, - { - "epoch": 0.9806573666436819, - "grad_norm": 1.7513400316238403, - "learning_rate": 3.3655710555938634e-05, - "loss": 0.697, - "step": 110930 - }, - { - "epoch": 0.9807457699039941, - "grad_norm": 1.9559534788131714, - "learning_rate": 3.365423716826677e-05, - "loss": 0.6521, - "step": 110940 - }, - { - "epoch": 0.9808341731643063, - "grad_norm": 5.948936939239502, - "learning_rate": 3.36527637805949e-05, - "loss": 0.6283, - "step": 110950 - }, - { - "epoch": 0.9809225764246186, - "grad_norm": 1.768458604812622, - "learning_rate": 3.3651290392923026e-05, - "loss": 0.6137, - "step": 110960 - }, - { - "epoch": 0.9810109796849308, - "grad_norm": 2.430455207824707, - "learning_rate": 3.3649817005251154e-05, - "loss": 0.5641, - "step": 110970 - }, - { - "epoch": 0.981099382945243, - "grad_norm": 7.473715305328369, - "learning_rate": 3.364834361757929e-05, - "loss": 0.6087, - "step": 110980 - }, - { - "epoch": 0.9811877862055552, - "grad_norm": 7.1489338874816895, - "learning_rate": 3.364687022990741e-05, - "loss": 0.592, - "step": 110990 - }, - { - "epoch": 0.9812761894658675, - "grad_norm": 3.24849271774292, - "learning_rate": 3.3645396842235546e-05, - "loss": 0.7402, - "step": 111000 - }, - { - "epoch": 0.9813645927261797, - "grad_norm": 2.569237470626831, - "learning_rate": 3.3643923454563675e-05, - "loss": 0.6987, - "step": 111010 - }, - { - "epoch": 0.9814529959864919, - "grad_norm": 2.116116523742676, - "learning_rate": 3.36424500668918e-05, - "loss": 0.767, - "step": 111020 - }, - { - "epoch": 0.9815413992468042, - "grad_norm": 2.550372362136841, - "learning_rate": 3.364097667921993e-05, - "loss": 0.5893, - "step": 111030 - }, - { - "epoch": 0.9816298025071165, - "grad_norm": 1.1438673734664917, - "learning_rate": 3.363950329154806e-05, - "loss": 0.6422, - "step": 111040 - }, - { - "epoch": 0.9817182057674287, - "grad_norm": 3.763845920562744, - "learning_rate": 3.363802990387619e-05, - "loss": 0.6291, - "step": 111050 - }, - { - "epoch": 0.981806609027741, - "grad_norm": 3.5108656883239746, - "learning_rate": 3.363655651620432e-05, - "loss": 0.7266, - "step": 111060 - }, - { - "epoch": 0.9818950122880532, - "grad_norm": 4.089592933654785, - "learning_rate": 3.3635083128532445e-05, - "loss": 0.6194, - "step": 111070 - }, - { - "epoch": 0.9819834155483654, - "grad_norm": 10.053905487060547, - "learning_rate": 3.363360974086058e-05, - "loss": 0.6434, - "step": 111080 - }, - { - "epoch": 0.9820718188086777, - "grad_norm": 2.0097062587738037, - "learning_rate": 3.363213635318871e-05, - "loss": 0.6876, - "step": 111090 - }, - { - "epoch": 0.9821602220689899, - "grad_norm": 2.302529811859131, - "learning_rate": 3.3630662965516837e-05, - "loss": 0.7691, - "step": 111100 - }, - { - "epoch": 0.9822486253293021, - "grad_norm": 15.456282615661621, - "learning_rate": 3.3629189577844965e-05, - "loss": 0.477, - "step": 111110 - }, - { - "epoch": 0.9823370285896144, - "grad_norm": 3.199341058731079, - "learning_rate": 3.36277161901731e-05, - "loss": 0.6802, - "step": 111120 - }, - { - "epoch": 0.9824254318499266, - "grad_norm": 2.484121799468994, - "learning_rate": 3.362624280250122e-05, - "loss": 0.6968, - "step": 111130 - }, - { - "epoch": 0.9825138351102388, - "grad_norm": 3.0543062686920166, - "learning_rate": 3.362476941482936e-05, - "loss": 0.599, - "step": 111140 - }, - { - "epoch": 0.982602238370551, - "grad_norm": 6.8025360107421875, - "learning_rate": 3.3623296027157485e-05, - "loss": 0.6477, - "step": 111150 - }, - { - "epoch": 0.9826906416308634, - "grad_norm": 4.920258045196533, - "learning_rate": 3.3621822639485613e-05, - "loss": 0.7036, - "step": 111160 - }, - { - "epoch": 0.9827790448911756, - "grad_norm": 6.934729099273682, - "learning_rate": 3.362034925181374e-05, - "loss": 0.6157, - "step": 111170 - }, - { - "epoch": 0.9828674481514879, - "grad_norm": 1.4260332584381104, - "learning_rate": 3.361887586414187e-05, - "loss": 0.5892, - "step": 111180 - }, - { - "epoch": 0.9829558514118001, - "grad_norm": 7.394562721252441, - "learning_rate": 3.361740247647e-05, - "loss": 0.6313, - "step": 111190 - }, - { - "epoch": 0.9830442546721123, - "grad_norm": 2.8978395462036133, - "learning_rate": 3.3615929088798134e-05, - "loss": 0.6461, - "step": 111200 - }, - { - "epoch": 0.9831326579324245, - "grad_norm": 5.9667277336120605, - "learning_rate": 3.3614455701126255e-05, - "loss": 0.6584, - "step": 111210 - }, - { - "epoch": 0.9832210611927368, - "grad_norm": 3.695119619369507, - "learning_rate": 3.361298231345439e-05, - "loss": 0.6886, - "step": 111220 - }, - { - "epoch": 0.983309464453049, - "grad_norm": 2.3961410522460938, - "learning_rate": 3.361150892578252e-05, - "loss": 0.6933, - "step": 111230 - }, - { - "epoch": 0.9833978677133612, - "grad_norm": 2.9586892127990723, - "learning_rate": 3.361003553811065e-05, - "loss": 0.678, - "step": 111240 - }, - { - "epoch": 0.9834862709736735, - "grad_norm": 3.3400754928588867, - "learning_rate": 3.3608562150438775e-05, - "loss": 0.7299, - "step": 111250 - }, - { - "epoch": 0.9835746742339857, - "grad_norm": 2.155742883682251, - "learning_rate": 3.360708876276691e-05, - "loss": 0.6606, - "step": 111260 - }, - { - "epoch": 0.983663077494298, - "grad_norm": 1.2182674407958984, - "learning_rate": 3.360561537509503e-05, - "loss": 0.5869, - "step": 111270 - }, - { - "epoch": 0.9837514807546103, - "grad_norm": 1.6962968111038208, - "learning_rate": 3.360414198742317e-05, - "loss": 0.5899, - "step": 111280 - }, - { - "epoch": 0.9838398840149225, - "grad_norm": 5.159481048583984, - "learning_rate": 3.360266859975129e-05, - "loss": 0.5996, - "step": 111290 - }, - { - "epoch": 0.9839282872752347, - "grad_norm": 4.989597797393799, - "learning_rate": 3.3601195212079424e-05, - "loss": 0.657, - "step": 111300 - }, - { - "epoch": 0.984016690535547, - "grad_norm": 1.6381126642227173, - "learning_rate": 3.359972182440755e-05, - "loss": 0.5976, - "step": 111310 - }, - { - "epoch": 0.9841050937958592, - "grad_norm": 4.167975902557373, - "learning_rate": 3.359824843673568e-05, - "loss": 0.6252, - "step": 111320 - }, - { - "epoch": 0.9841934970561714, - "grad_norm": 7.51429557800293, - "learning_rate": 3.359677504906381e-05, - "loss": 0.6074, - "step": 111330 - }, - { - "epoch": 0.9842819003164837, - "grad_norm": 1.646257758140564, - "learning_rate": 3.3595301661391944e-05, - "loss": 0.6902, - "step": 111340 - }, - { - "epoch": 0.9843703035767959, - "grad_norm": 3.257969617843628, - "learning_rate": 3.3593828273720066e-05, - "loss": 0.6878, - "step": 111350 - }, - { - "epoch": 0.9844587068371081, - "grad_norm": 4.001593589782715, - "learning_rate": 3.35923548860482e-05, - "loss": 0.6537, - "step": 111360 - }, - { - "epoch": 0.9845471100974204, - "grad_norm": 3.3365602493286133, - "learning_rate": 3.359088149837633e-05, - "loss": 0.6407, - "step": 111370 - }, - { - "epoch": 0.9846355133577326, - "grad_norm": 3.926586389541626, - "learning_rate": 3.358940811070446e-05, - "loss": 0.6118, - "step": 111380 - }, - { - "epoch": 0.9847239166180449, - "grad_norm": 2.5958337783813477, - "learning_rate": 3.3587934723032586e-05, - "loss": 0.6081, - "step": 111390 - }, - { - "epoch": 0.9848123198783572, - "grad_norm": 4.597136974334717, - "learning_rate": 3.3586461335360714e-05, - "loss": 0.6398, - "step": 111400 - }, - { - "epoch": 0.9849007231386694, - "grad_norm": 2.380460262298584, - "learning_rate": 3.358498794768884e-05, - "loss": 0.5416, - "step": 111410 - }, - { - "epoch": 0.9849891263989816, - "grad_norm": 6.699055194854736, - "learning_rate": 3.358351456001698e-05, - "loss": 0.8373, - "step": 111420 - }, - { - "epoch": 0.9850775296592938, - "grad_norm": 2.34159779548645, - "learning_rate": 3.35820411723451e-05, - "loss": 0.5761, - "step": 111430 - }, - { - "epoch": 0.9851659329196061, - "grad_norm": 2.4140844345092773, - "learning_rate": 3.3580567784673235e-05, - "loss": 0.6197, - "step": 111440 - }, - { - "epoch": 0.9852543361799183, - "grad_norm": 1.828371286392212, - "learning_rate": 3.357909439700136e-05, - "loss": 0.5182, - "step": 111450 - }, - { - "epoch": 0.9853427394402305, - "grad_norm": 7.179087162017822, - "learning_rate": 3.357762100932949e-05, - "loss": 0.7365, - "step": 111460 - }, - { - "epoch": 0.9854311427005428, - "grad_norm": 5.928529262542725, - "learning_rate": 3.357614762165762e-05, - "loss": 0.7532, - "step": 111470 - }, - { - "epoch": 0.985519545960855, - "grad_norm": 8.555516242980957, - "learning_rate": 3.3574674233985755e-05, - "loss": 0.7208, - "step": 111480 - }, - { - "epoch": 0.9856079492211672, - "grad_norm": 1.555471420288086, - "learning_rate": 3.3573200846313876e-05, - "loss": 0.7237, - "step": 111490 - }, - { - "epoch": 0.9856963524814795, - "grad_norm": 2.138625144958496, - "learning_rate": 3.357172745864201e-05, - "loss": 0.6548, - "step": 111500 - }, - { - "epoch": 0.9857847557417918, - "grad_norm": 1.7726247310638428, - "learning_rate": 3.357025407097013e-05, - "loss": 0.5777, - "step": 111510 - }, - { - "epoch": 0.985873159002104, - "grad_norm": 2.086958885192871, - "learning_rate": 3.356878068329827e-05, - "loss": 0.6493, - "step": 111520 - }, - { - "epoch": 0.9859615622624163, - "grad_norm": 11.418235778808594, - "learning_rate": 3.3567307295626396e-05, - "loss": 0.6205, - "step": 111530 - }, - { - "epoch": 0.9860499655227285, - "grad_norm": 1.4123930931091309, - "learning_rate": 3.3565833907954525e-05, - "loss": 0.63, - "step": 111540 - }, - { - "epoch": 0.9861383687830407, - "grad_norm": 2.378354787826538, - "learning_rate": 3.356436052028265e-05, - "loss": 0.5865, - "step": 111550 - }, - { - "epoch": 0.986226772043353, - "grad_norm": 3.8460135459899902, - "learning_rate": 3.356288713261079e-05, - "loss": 0.5692, - "step": 111560 - }, - { - "epoch": 0.9863151753036652, - "grad_norm": 4.886835098266602, - "learning_rate": 3.356141374493891e-05, - "loss": 0.5166, - "step": 111570 - }, - { - "epoch": 0.9864035785639774, - "grad_norm": 3.513688564300537, - "learning_rate": 3.3559940357267045e-05, - "loss": 0.6542, - "step": 111580 - }, - { - "epoch": 0.9864919818242897, - "grad_norm": 1.3782950639724731, - "learning_rate": 3.355846696959517e-05, - "loss": 0.6315, - "step": 111590 - }, - { - "epoch": 0.9865803850846019, - "grad_norm": 3.303790807723999, - "learning_rate": 3.35569935819233e-05, - "loss": 0.593, - "step": 111600 - }, - { - "epoch": 0.9866687883449141, - "grad_norm": 4.353058338165283, - "learning_rate": 3.355552019425143e-05, - "loss": 0.7096, - "step": 111610 - }, - { - "epoch": 0.9867571916052263, - "grad_norm": 1.3499033451080322, - "learning_rate": 3.3554046806579565e-05, - "loss": 0.7414, - "step": 111620 - }, - { - "epoch": 0.9868455948655387, - "grad_norm": 1.2435297966003418, - "learning_rate": 3.355257341890769e-05, - "loss": 0.5722, - "step": 111630 - }, - { - "epoch": 0.9869339981258509, - "grad_norm": 9.246455192565918, - "learning_rate": 3.355110003123582e-05, - "loss": 0.6394, - "step": 111640 - }, - { - "epoch": 0.9870224013861632, - "grad_norm": 1.8854089975357056, - "learning_rate": 3.354962664356395e-05, - "loss": 0.5993, - "step": 111650 - }, - { - "epoch": 0.9871108046464754, - "grad_norm": 1.8141562938690186, - "learning_rate": 3.354815325589208e-05, - "loss": 0.7156, - "step": 111660 - }, - { - "epoch": 0.9871992079067876, - "grad_norm": 3.7588021755218506, - "learning_rate": 3.354667986822021e-05, - "loss": 0.5912, - "step": 111670 - }, - { - "epoch": 0.9872876111670998, - "grad_norm": 2.8974814414978027, - "learning_rate": 3.3545206480548335e-05, - "loss": 0.6617, - "step": 111680 - }, - { - "epoch": 0.9873760144274121, - "grad_norm": 4.168397903442383, - "learning_rate": 3.3543733092876464e-05, - "loss": 0.615, - "step": 111690 - }, - { - "epoch": 0.9874644176877243, - "grad_norm": 2.7471868991851807, - "learning_rate": 3.35422597052046e-05, - "loss": 0.6021, - "step": 111700 - }, - { - "epoch": 0.9875528209480365, - "grad_norm": 1.2697445154190063, - "learning_rate": 3.354078631753273e-05, - "loss": 0.7222, - "step": 111710 - }, - { - "epoch": 0.9876412242083488, - "grad_norm": 0.9256862998008728, - "learning_rate": 3.3539312929860856e-05, - "loss": 0.6113, - "step": 111720 - }, - { - "epoch": 0.987729627468661, - "grad_norm": 3.108173131942749, - "learning_rate": 3.3537839542188984e-05, - "loss": 0.5417, - "step": 111730 - }, - { - "epoch": 0.9878180307289733, - "grad_norm": 2.847672939300537, - "learning_rate": 3.353636615451711e-05, - "loss": 0.7285, - "step": 111740 - }, - { - "epoch": 0.9879064339892856, - "grad_norm": 2.249431848526001, - "learning_rate": 3.353489276684524e-05, - "loss": 0.6328, - "step": 111750 - }, - { - "epoch": 0.9879948372495978, - "grad_norm": 2.3861143589019775, - "learning_rate": 3.353341937917337e-05, - "loss": 0.7842, - "step": 111760 - }, - { - "epoch": 0.98808324050991, - "grad_norm": 1.4964494705200195, - "learning_rate": 3.3531945991501504e-05, - "loss": 0.6366, - "step": 111770 - }, - { - "epoch": 0.9881716437702223, - "grad_norm": 2.032001256942749, - "learning_rate": 3.353047260382963e-05, - "loss": 0.5816, - "step": 111780 - }, - { - "epoch": 0.9882600470305345, - "grad_norm": 1.4544965028762817, - "learning_rate": 3.352899921615776e-05, - "loss": 0.6942, - "step": 111790 - }, - { - "epoch": 0.9883484502908467, - "grad_norm": 2.392289638519287, - "learning_rate": 3.352752582848589e-05, - "loss": 0.6744, - "step": 111800 - }, - { - "epoch": 0.988436853551159, - "grad_norm": 2.157137155532837, - "learning_rate": 3.352605244081402e-05, - "loss": 0.5898, - "step": 111810 - }, - { - "epoch": 0.9885252568114712, - "grad_norm": 4.992175102233887, - "learning_rate": 3.3524579053142146e-05, - "loss": 0.5477, - "step": 111820 - }, - { - "epoch": 0.9886136600717834, - "grad_norm": 7.483372211456299, - "learning_rate": 3.352310566547028e-05, - "loss": 0.5999, - "step": 111830 - }, - { - "epoch": 0.9887020633320956, - "grad_norm": 4.890076637268066, - "learning_rate": 3.352163227779841e-05, - "loss": 0.6696, - "step": 111840 - }, - { - "epoch": 0.9887904665924079, - "grad_norm": 13.550922393798828, - "learning_rate": 3.352015889012654e-05, - "loss": 0.6865, - "step": 111850 - }, - { - "epoch": 0.9888788698527202, - "grad_norm": 1.2540837526321411, - "learning_rate": 3.3518685502454666e-05, - "loss": 0.6883, - "step": 111860 - }, - { - "epoch": 0.9889672731130325, - "grad_norm": 2.1965646743774414, - "learning_rate": 3.3517212114782794e-05, - "loss": 0.7527, - "step": 111870 - }, - { - "epoch": 0.9890556763733447, - "grad_norm": 1.8193001747131348, - "learning_rate": 3.351573872711092e-05, - "loss": 0.6358, - "step": 111880 - }, - { - "epoch": 0.9891440796336569, - "grad_norm": 1.7222371101379395, - "learning_rate": 3.351426533943906e-05, - "loss": 0.6972, - "step": 111890 - }, - { - "epoch": 0.9892324828939691, - "grad_norm": 3.522794723510742, - "learning_rate": 3.351279195176718e-05, - "loss": 0.5687, - "step": 111900 - }, - { - "epoch": 0.9893208861542814, - "grad_norm": 3.7774064540863037, - "learning_rate": 3.3511318564095315e-05, - "loss": 0.615, - "step": 111910 - }, - { - "epoch": 0.9894092894145936, - "grad_norm": 0.9122946262359619, - "learning_rate": 3.350984517642344e-05, - "loss": 0.5144, - "step": 111920 - }, - { - "epoch": 0.9894976926749058, - "grad_norm": 2.9657299518585205, - "learning_rate": 3.350837178875157e-05, - "loss": 0.6468, - "step": 111930 - }, - { - "epoch": 0.9895860959352181, - "grad_norm": 4.893675804138184, - "learning_rate": 3.35068984010797e-05, - "loss": 0.5725, - "step": 111940 - }, - { - "epoch": 0.9896744991955303, - "grad_norm": 1.4844152927398682, - "learning_rate": 3.3505425013407835e-05, - "loss": 0.732, - "step": 111950 - }, - { - "epoch": 0.9897629024558425, - "grad_norm": 5.409655570983887, - "learning_rate": 3.3503951625735956e-05, - "loss": 0.6168, - "step": 111960 - }, - { - "epoch": 0.9898513057161548, - "grad_norm": 1.2928420305252075, - "learning_rate": 3.350247823806409e-05, - "loss": 0.6159, - "step": 111970 - }, - { - "epoch": 0.9899397089764671, - "grad_norm": 2.484102725982666, - "learning_rate": 3.350100485039221e-05, - "loss": 0.7712, - "step": 111980 - }, - { - "epoch": 0.9900281122367793, - "grad_norm": 2.1543467044830322, - "learning_rate": 3.349953146272035e-05, - "loss": 0.5778, - "step": 111990 - }, - { - "epoch": 0.9901165154970916, - "grad_norm": 1.3941656351089478, - "learning_rate": 3.3498058075048477e-05, - "loss": 0.7212, - "step": 112000 - }, - { - "epoch": 0.9902049187574038, - "grad_norm": 10.229924201965332, - "learning_rate": 3.3496584687376605e-05, - "loss": 0.7138, - "step": 112010 - }, - { - "epoch": 0.990293322017716, - "grad_norm": 14.385763168334961, - "learning_rate": 3.349511129970473e-05, - "loss": 0.5272, - "step": 112020 - }, - { - "epoch": 0.9903817252780283, - "grad_norm": 3.4601848125457764, - "learning_rate": 3.349363791203287e-05, - "loss": 0.6389, - "step": 112030 - }, - { - "epoch": 0.9904701285383405, - "grad_norm": 13.993854522705078, - "learning_rate": 3.349216452436099e-05, - "loss": 0.5967, - "step": 112040 - }, - { - "epoch": 0.9905585317986527, - "grad_norm": 4.1742119789123535, - "learning_rate": 3.3490691136689125e-05, - "loss": 0.589, - "step": 112050 - }, - { - "epoch": 0.990646935058965, - "grad_norm": 2.8001863956451416, - "learning_rate": 3.3489217749017253e-05, - "loss": 0.6774, - "step": 112060 - }, - { - "epoch": 0.9907353383192772, - "grad_norm": 4.187798023223877, - "learning_rate": 3.348774436134538e-05, - "loss": 0.6014, - "step": 112070 - }, - { - "epoch": 0.9908237415795894, - "grad_norm": 9.613956451416016, - "learning_rate": 3.348627097367351e-05, - "loss": 0.7411, - "step": 112080 - }, - { - "epoch": 0.9909121448399016, - "grad_norm": 2.667699098587036, - "learning_rate": 3.3484797586001645e-05, - "loss": 0.565, - "step": 112090 - }, - { - "epoch": 0.991000548100214, - "grad_norm": 11.713051795959473, - "learning_rate": 3.348332419832977e-05, - "loss": 0.6704, - "step": 112100 - }, - { - "epoch": 0.9910889513605262, - "grad_norm": 4.876836776733398, - "learning_rate": 3.34818508106579e-05, - "loss": 0.658, - "step": 112110 - }, - { - "epoch": 0.9911773546208384, - "grad_norm": 2.1874516010284424, - "learning_rate": 3.3480377422986024e-05, - "loss": 0.509, - "step": 112120 - }, - { - "epoch": 0.9912657578811507, - "grad_norm": 2.001737117767334, - "learning_rate": 3.347890403531416e-05, - "loss": 0.6167, - "step": 112130 - }, - { - "epoch": 0.9913541611414629, - "grad_norm": 2.1845858097076416, - "learning_rate": 3.347743064764229e-05, - "loss": 0.5533, - "step": 112140 - }, - { - "epoch": 0.9914425644017751, - "grad_norm": 1.0823453664779663, - "learning_rate": 3.3475957259970415e-05, - "loss": 0.5117, - "step": 112150 - }, - { - "epoch": 0.9915309676620874, - "grad_norm": 4.162007808685303, - "learning_rate": 3.3474483872298544e-05, - "loss": 0.7083, - "step": 112160 - }, - { - "epoch": 0.9916193709223996, - "grad_norm": 1.4400945901870728, - "learning_rate": 3.347301048462668e-05, - "loss": 0.7027, - "step": 112170 - }, - { - "epoch": 0.9917077741827118, - "grad_norm": 0.8610682487487793, - "learning_rate": 3.34715370969548e-05, - "loss": 0.6419, - "step": 112180 - }, - { - "epoch": 0.9917961774430241, - "grad_norm": 2.548790454864502, - "learning_rate": 3.3470063709282936e-05, - "loss": 0.6447, - "step": 112190 - }, - { - "epoch": 0.9918845807033363, - "grad_norm": 1.4812498092651367, - "learning_rate": 3.3468590321611064e-05, - "loss": 0.6844, - "step": 112200 - }, - { - "epoch": 0.9919729839636485, - "grad_norm": 3.9896907806396484, - "learning_rate": 3.346711693393919e-05, - "loss": 0.5442, - "step": 112210 - }, - { - "epoch": 0.9920613872239609, - "grad_norm": 3.034356117248535, - "learning_rate": 3.346564354626732e-05, - "loss": 0.5341, - "step": 112220 - }, - { - "epoch": 0.9921497904842731, - "grad_norm": 1.6086528301239014, - "learning_rate": 3.346417015859545e-05, - "loss": 0.5476, - "step": 112230 - }, - { - "epoch": 0.9922381937445853, - "grad_norm": 2.423977851867676, - "learning_rate": 3.346269677092358e-05, - "loss": 0.663, - "step": 112240 - }, - { - "epoch": 0.9923265970048976, - "grad_norm": 1.9531772136688232, - "learning_rate": 3.346122338325171e-05, - "loss": 0.6186, - "step": 112250 - }, - { - "epoch": 0.9924150002652098, - "grad_norm": 1.5535861253738403, - "learning_rate": 3.3459749995579834e-05, - "loss": 0.6499, - "step": 112260 - }, - { - "epoch": 0.992503403525522, - "grad_norm": 5.8906378746032715, - "learning_rate": 3.345827660790797e-05, - "loss": 0.7592, - "step": 112270 - }, - { - "epoch": 0.9925918067858343, - "grad_norm": 3.8838255405426025, - "learning_rate": 3.34568032202361e-05, - "loss": 0.6756, - "step": 112280 - }, - { - "epoch": 0.9926802100461465, - "grad_norm": 3.5611259937286377, - "learning_rate": 3.3455329832564226e-05, - "loss": 0.5665, - "step": 112290 - }, - { - "epoch": 0.9927686133064587, - "grad_norm": 4.166460990905762, - "learning_rate": 3.3453856444892354e-05, - "loss": 0.6722, - "step": 112300 - }, - { - "epoch": 0.992857016566771, - "grad_norm": 2.573184013366699, - "learning_rate": 3.345238305722049e-05, - "loss": 0.6226, - "step": 112310 - }, - { - "epoch": 0.9929454198270832, - "grad_norm": 3.364985704421997, - "learning_rate": 3.345090966954861e-05, - "loss": 0.6554, - "step": 112320 - }, - { - "epoch": 0.9930338230873955, - "grad_norm": 1.81927490234375, - "learning_rate": 3.3449436281876746e-05, - "loss": 0.5596, - "step": 112330 - }, - { - "epoch": 0.9931222263477077, - "grad_norm": 3.421112060546875, - "learning_rate": 3.344796289420487e-05, - "loss": 0.5602, - "step": 112340 - }, - { - "epoch": 0.99321062960802, - "grad_norm": 2.461779832839966, - "learning_rate": 3.3446489506533e-05, - "loss": 0.6946, - "step": 112350 - }, - { - "epoch": 0.9932990328683322, - "grad_norm": 2.894778251647949, - "learning_rate": 3.344501611886113e-05, - "loss": 0.7578, - "step": 112360 - }, - { - "epoch": 0.9933874361286444, - "grad_norm": 5.531325817108154, - "learning_rate": 3.344354273118926e-05, - "loss": 0.7036, - "step": 112370 - }, - { - "epoch": 0.9934758393889567, - "grad_norm": 8.579312324523926, - "learning_rate": 3.344206934351739e-05, - "loss": 0.6394, - "step": 112380 - }, - { - "epoch": 0.9935642426492689, - "grad_norm": 5.952613830566406, - "learning_rate": 3.344059595584552e-05, - "loss": 0.6792, - "step": 112390 - }, - { - "epoch": 0.9936526459095811, - "grad_norm": 1.9628828763961792, - "learning_rate": 3.3439122568173645e-05, - "loss": 0.5413, - "step": 112400 - }, - { - "epoch": 0.9937410491698934, - "grad_norm": 2.6676266193389893, - "learning_rate": 3.343764918050178e-05, - "loss": 0.6368, - "step": 112410 - }, - { - "epoch": 0.9938294524302056, - "grad_norm": 0.967044472694397, - "learning_rate": 3.343617579282991e-05, - "loss": 0.6824, - "step": 112420 - }, - { - "epoch": 0.9939178556905178, - "grad_norm": 5.3108439445495605, - "learning_rate": 3.3434702405158036e-05, - "loss": 0.7213, - "step": 112430 - }, - { - "epoch": 0.9940062589508301, - "grad_norm": 2.7890877723693848, - "learning_rate": 3.3433229017486165e-05, - "loss": 0.6702, - "step": 112440 - }, - { - "epoch": 0.9940946622111424, - "grad_norm": 1.9422330856323242, - "learning_rate": 3.343175562981429e-05, - "loss": 0.6479, - "step": 112450 - }, - { - "epoch": 0.9941830654714546, - "grad_norm": 3.3416390419006348, - "learning_rate": 3.343028224214242e-05, - "loss": 0.5992, - "step": 112460 - }, - { - "epoch": 0.9942714687317669, - "grad_norm": 1.1402921676635742, - "learning_rate": 3.3428808854470557e-05, - "loss": 0.6883, - "step": 112470 - }, - { - "epoch": 0.9943598719920791, - "grad_norm": 2.2184958457946777, - "learning_rate": 3.342733546679868e-05, - "loss": 0.6027, - "step": 112480 - }, - { - "epoch": 0.9944482752523913, - "grad_norm": 4.096175670623779, - "learning_rate": 3.342586207912681e-05, - "loss": 0.6861, - "step": 112490 - }, - { - "epoch": 0.9945366785127036, - "grad_norm": 3.490675687789917, - "learning_rate": 3.342438869145494e-05, - "loss": 0.6124, - "step": 112500 - }, - { - "epoch": 0.9946250817730158, - "grad_norm": 1.02232825756073, - "learning_rate": 3.342291530378307e-05, - "loss": 0.6064, - "step": 112510 - }, - { - "epoch": 0.994713485033328, - "grad_norm": 3.6563689708709717, - "learning_rate": 3.34214419161112e-05, - "loss": 0.6643, - "step": 112520 - }, - { - "epoch": 0.9948018882936402, - "grad_norm": 10.541913986206055, - "learning_rate": 3.3419968528439334e-05, - "loss": 0.5836, - "step": 112530 - }, - { - "epoch": 0.9948902915539525, - "grad_norm": 4.165454864501953, - "learning_rate": 3.3418495140767455e-05, - "loss": 0.7104, - "step": 112540 - }, - { - "epoch": 0.9949786948142647, - "grad_norm": 5.17685604095459, - "learning_rate": 3.341702175309559e-05, - "loss": 0.5705, - "step": 112550 - }, - { - "epoch": 0.9950670980745769, - "grad_norm": 6.6226630210876465, - "learning_rate": 3.341554836542372e-05, - "loss": 0.6129, - "step": 112560 - }, - { - "epoch": 0.9951555013348893, - "grad_norm": 2.9814226627349854, - "learning_rate": 3.341407497775185e-05, - "loss": 0.7265, - "step": 112570 - }, - { - "epoch": 0.9952439045952015, - "grad_norm": 8.501177787780762, - "learning_rate": 3.3412601590079975e-05, - "loss": 0.6709, - "step": 112580 - }, - { - "epoch": 0.9953323078555137, - "grad_norm": 2.1733036041259766, - "learning_rate": 3.3411128202408104e-05, - "loss": 0.78, - "step": 112590 - }, - { - "epoch": 0.995420711115826, - "grad_norm": 1.7375086545944214, - "learning_rate": 3.340965481473624e-05, - "loss": 0.7385, - "step": 112600 - }, - { - "epoch": 0.9955091143761382, - "grad_norm": 2.2866108417510986, - "learning_rate": 3.340818142706437e-05, - "loss": 0.7098, - "step": 112610 - }, - { - "epoch": 0.9955975176364504, - "grad_norm": 5.398899078369141, - "learning_rate": 3.3406708039392495e-05, - "loss": 0.6375, - "step": 112620 - }, - { - "epoch": 0.9956859208967627, - "grad_norm": 1.354555368423462, - "learning_rate": 3.3405234651720624e-05, - "loss": 0.6631, - "step": 112630 - }, - { - "epoch": 0.9957743241570749, - "grad_norm": 2.862786293029785, - "learning_rate": 3.340376126404875e-05, - "loss": 0.6972, - "step": 112640 - }, - { - "epoch": 0.9958627274173871, - "grad_norm": 2.064380645751953, - "learning_rate": 3.340228787637688e-05, - "loss": 0.6434, - "step": 112650 - }, - { - "epoch": 0.9959511306776994, - "grad_norm": 3.987992286682129, - "learning_rate": 3.3400814488705016e-05, - "loss": 0.6607, - "step": 112660 - }, - { - "epoch": 0.9960395339380116, - "grad_norm": 1.7794808149337769, - "learning_rate": 3.3399341101033144e-05, - "loss": 0.6053, - "step": 112670 - }, - { - "epoch": 0.9961279371983238, - "grad_norm": 2.814638376235962, - "learning_rate": 3.339786771336127e-05, - "loss": 0.608, - "step": 112680 - }, - { - "epoch": 0.9962163404586362, - "grad_norm": 2.2738704681396484, - "learning_rate": 3.33963943256894e-05, - "loss": 0.6412, - "step": 112690 - }, - { - "epoch": 0.9963047437189484, - "grad_norm": 3.6364216804504395, - "learning_rate": 3.339492093801753e-05, - "loss": 0.6692, - "step": 112700 - }, - { - "epoch": 0.9963931469792606, - "grad_norm": 1.6770386695861816, - "learning_rate": 3.339344755034566e-05, - "loss": 0.6817, - "step": 112710 - }, - { - "epoch": 0.9964815502395729, - "grad_norm": 5.360958576202393, - "learning_rate": 3.339197416267379e-05, - "loss": 0.6297, - "step": 112720 - }, - { - "epoch": 0.9965699534998851, - "grad_norm": 2.039898157119751, - "learning_rate": 3.3390500775001914e-05, - "loss": 0.6651, - "step": 112730 - }, - { - "epoch": 0.9966583567601973, - "grad_norm": 4.612518310546875, - "learning_rate": 3.338902738733005e-05, - "loss": 0.6607, - "step": 112740 - }, - { - "epoch": 0.9967467600205095, - "grad_norm": 7.095003604888916, - "learning_rate": 3.338755399965818e-05, - "loss": 0.6472, - "step": 112750 - }, - { - "epoch": 0.9968351632808218, - "grad_norm": 2.640900135040283, - "learning_rate": 3.3386080611986306e-05, - "loss": 0.572, - "step": 112760 - }, - { - "epoch": 0.996923566541134, - "grad_norm": 11.376489639282227, - "learning_rate": 3.3384607224314434e-05, - "loss": 0.7194, - "step": 112770 - }, - { - "epoch": 0.9970119698014462, - "grad_norm": 1.747674822807312, - "learning_rate": 3.338313383664257e-05, - "loss": 0.739, - "step": 112780 - }, - { - "epoch": 0.9971003730617585, - "grad_norm": 9.786775588989258, - "learning_rate": 3.338166044897069e-05, - "loss": 0.7105, - "step": 112790 - }, - { - "epoch": 0.9971887763220708, - "grad_norm": 3.084177255630493, - "learning_rate": 3.3380187061298826e-05, - "loss": 0.6088, - "step": 112800 - }, - { - "epoch": 0.997277179582383, - "grad_norm": 4.496860980987549, - "learning_rate": 3.337871367362695e-05, - "loss": 0.5767, - "step": 112810 - }, - { - "epoch": 0.9973655828426953, - "grad_norm": 0.9731305837631226, - "learning_rate": 3.337724028595508e-05, - "loss": 0.5315, - "step": 112820 - }, - { - "epoch": 0.9974539861030075, - "grad_norm": 17.036603927612305, - "learning_rate": 3.337576689828321e-05, - "loss": 0.6176, - "step": 112830 - }, - { - "epoch": 0.9975423893633197, - "grad_norm": 1.8506420850753784, - "learning_rate": 3.337429351061134e-05, - "loss": 0.675, - "step": 112840 - }, - { - "epoch": 0.997630792623632, - "grad_norm": 1.9069759845733643, - "learning_rate": 3.337282012293947e-05, - "loss": 0.6838, - "step": 112850 - }, - { - "epoch": 0.9977191958839442, - "grad_norm": 2.550217628479004, - "learning_rate": 3.33713467352676e-05, - "loss": 0.6823, - "step": 112860 - }, - { - "epoch": 0.9978075991442564, - "grad_norm": 2.61788010597229, - "learning_rate": 3.3369873347595725e-05, - "loss": 0.7089, - "step": 112870 - }, - { - "epoch": 0.9978960024045687, - "grad_norm": 1.6682244539260864, - "learning_rate": 3.336839995992386e-05, - "loss": 0.7126, - "step": 112880 - }, - { - "epoch": 0.9979844056648809, - "grad_norm": 2.2637345790863037, - "learning_rate": 3.336692657225199e-05, - "loss": 0.7083, - "step": 112890 - }, - { - "epoch": 0.9980728089251931, - "grad_norm": 3.195830821990967, - "learning_rate": 3.3365453184580116e-05, - "loss": 0.649, - "step": 112900 - }, - { - "epoch": 0.9981612121855054, - "grad_norm": 2.306546688079834, - "learning_rate": 3.3363979796908245e-05, - "loss": 0.7072, - "step": 112910 - }, - { - "epoch": 0.9982496154458177, - "grad_norm": 3.3835599422454834, - "learning_rate": 3.336250640923637e-05, - "loss": 0.7273, - "step": 112920 - }, - { - "epoch": 0.9983380187061299, - "grad_norm": 2.138302803039551, - "learning_rate": 3.33610330215645e-05, - "loss": 0.7381, - "step": 112930 - }, - { - "epoch": 0.9984264219664422, - "grad_norm": 1.5605179071426392, - "learning_rate": 3.335955963389264e-05, - "loss": 0.5386, - "step": 112940 - }, - { - "epoch": 0.9985148252267544, - "grad_norm": 2.307772159576416, - "learning_rate": 3.335808624622076e-05, - "loss": 0.6734, - "step": 112950 - }, - { - "epoch": 0.9986032284870666, - "grad_norm": 1.731562614440918, - "learning_rate": 3.335661285854889e-05, - "loss": 0.5946, - "step": 112960 - }, - { - "epoch": 0.9986916317473788, - "grad_norm": 1.4824844598770142, - "learning_rate": 3.335513947087702e-05, - "loss": 0.6387, - "step": 112970 - }, - { - "epoch": 0.9987800350076911, - "grad_norm": 11.846600532531738, - "learning_rate": 3.335366608320515e-05, - "loss": 0.6748, - "step": 112980 - }, - { - "epoch": 0.9988684382680033, - "grad_norm": 1.443597435951233, - "learning_rate": 3.335219269553328e-05, - "loss": 0.695, - "step": 112990 - }, - { - "epoch": 0.9989568415283155, - "grad_norm": 2.229417085647583, - "learning_rate": 3.3350719307861414e-05, - "loss": 0.6362, - "step": 113000 - }, - { - "epoch": 0.9990452447886278, - "grad_norm": 2.9889605045318604, - "learning_rate": 3.3349245920189535e-05, - "loss": 0.7178, - "step": 113010 - }, - { - "epoch": 0.99913364804894, - "grad_norm": 3.6316070556640625, - "learning_rate": 3.334777253251767e-05, - "loss": 0.717, - "step": 113020 - }, - { - "epoch": 0.9992220513092522, - "grad_norm": 3.6598217487335205, - "learning_rate": 3.33462991448458e-05, - "loss": 0.7047, - "step": 113030 - }, - { - "epoch": 0.9993104545695646, - "grad_norm": 1.576230525970459, - "learning_rate": 3.334482575717393e-05, - "loss": 0.525, - "step": 113040 - }, - { - "epoch": 0.9993988578298768, - "grad_norm": 1.6156716346740723, - "learning_rate": 3.3343352369502055e-05, - "loss": 0.6069, - "step": 113050 - }, - { - "epoch": 0.999487261090189, - "grad_norm": 1.4244310855865479, - "learning_rate": 3.3341878981830184e-05, - "loss": 0.7527, - "step": 113060 - }, - { - "epoch": 0.9995756643505013, - "grad_norm": 3.3648128509521484, - "learning_rate": 3.334040559415831e-05, - "loss": 0.8143, - "step": 113070 - }, - { - "epoch": 0.9996640676108135, - "grad_norm": 2.311471700668335, - "learning_rate": 3.333893220648645e-05, - "loss": 0.5865, - "step": 113080 - }, - { - "epoch": 0.9997524708711257, - "grad_norm": 4.397999286651611, - "learning_rate": 3.333745881881457e-05, - "loss": 0.6696, - "step": 113090 - }, - { - "epoch": 0.999840874131438, - "grad_norm": 6.6669020652771, - "learning_rate": 3.3335985431142704e-05, - "loss": 0.5819, - "step": 113100 - }, - { - "epoch": 0.9999292773917502, - "grad_norm": 2.493673086166382, - "learning_rate": 3.333451204347083e-05, - "loss": 0.6061, - "step": 113110 - }, - { - "epoch": 1.0, - "eval_loss": 0.6402608752250671, - "eval_runtime": 1558.2793, - "eval_samples_per_second": 290.365, - "eval_steps_per_second": 18.148, - "step": 113118 - }, - { - "epoch": 1.0000176806520624, - "grad_norm": 5.0341668128967285, - "learning_rate": 3.333303865579896e-05, - "loss": 0.5431, - "step": 113120 - }, - { - "epoch": 1.0001060839123748, - "grad_norm": 1.3547182083129883, - "learning_rate": 3.333156526812709e-05, - "loss": 0.5498, - "step": 113130 - }, - { - "epoch": 1.0001944871726869, - "grad_norm": 1.4502989053726196, - "learning_rate": 3.3330091880455224e-05, - "loss": 0.6172, - "step": 113140 - }, - { - "epoch": 1.0002828904329992, - "grad_norm": 5.147324562072754, - "learning_rate": 3.3328618492783346e-05, - "loss": 0.5801, - "step": 113150 - }, - { - "epoch": 1.0003712936933113, - "grad_norm": 0.6961853504180908, - "learning_rate": 3.332714510511148e-05, - "loss": 0.5647, - "step": 113160 - }, - { - "epoch": 1.0004596969536237, - "grad_norm": 2.148359537124634, - "learning_rate": 3.33256717174396e-05, - "loss": 0.709, - "step": 113170 - }, - { - "epoch": 1.0005481002139358, - "grad_norm": 5.336066722869873, - "learning_rate": 3.332419832976774e-05, - "loss": 0.5048, - "step": 113180 - }, - { - "epoch": 1.0006365034742482, - "grad_norm": 1.5481164455413818, - "learning_rate": 3.3322724942095866e-05, - "loss": 0.6045, - "step": 113190 - }, - { - "epoch": 1.0007249067345603, - "grad_norm": 3.8603017330169678, - "learning_rate": 3.3321251554423994e-05, - "loss": 0.6148, - "step": 113200 - }, - { - "epoch": 1.0008133099948726, - "grad_norm": 2.894463300704956, - "learning_rate": 3.331977816675212e-05, - "loss": 0.5941, - "step": 113210 - }, - { - "epoch": 1.000901713255185, - "grad_norm": 7.036715984344482, - "learning_rate": 3.331830477908026e-05, - "loss": 0.5017, - "step": 113220 - }, - { - "epoch": 1.000990116515497, - "grad_norm": 5.875628471374512, - "learning_rate": 3.331683139140838e-05, - "loss": 0.5387, - "step": 113230 - }, - { - "epoch": 1.0010785197758094, - "grad_norm": 5.956996917724609, - "learning_rate": 3.3315358003736514e-05, - "loss": 0.6005, - "step": 113240 - }, - { - "epoch": 1.0011669230361215, - "grad_norm": 11.41282844543457, - "learning_rate": 3.331388461606464e-05, - "loss": 0.5063, - "step": 113250 - }, - { - "epoch": 1.0012553262964339, - "grad_norm": 2.202025890350342, - "learning_rate": 3.331241122839277e-05, - "loss": 0.5133, - "step": 113260 - }, - { - "epoch": 1.001343729556746, - "grad_norm": 2.3404276371002197, - "learning_rate": 3.33109378407209e-05, - "loss": 0.499, - "step": 113270 - }, - { - "epoch": 1.0014321328170583, - "grad_norm": 1.808912754058838, - "learning_rate": 3.330946445304903e-05, - "loss": 0.71, - "step": 113280 - }, - { - "epoch": 1.0015205360773705, - "grad_norm": 3.837283134460449, - "learning_rate": 3.3307991065377156e-05, - "loss": 0.7154, - "step": 113290 - }, - { - "epoch": 1.0016089393376828, - "grad_norm": 1.2322243452072144, - "learning_rate": 3.330651767770529e-05, - "loss": 0.793, - "step": 113300 - }, - { - "epoch": 1.001697342597995, - "grad_norm": 7.320115089416504, - "learning_rate": 3.330504429003341e-05, - "loss": 0.6948, - "step": 113310 - }, - { - "epoch": 1.0017857458583073, - "grad_norm": 4.228982925415039, - "learning_rate": 3.330357090236155e-05, - "loss": 0.6774, - "step": 113320 - }, - { - "epoch": 1.0018741491186196, - "grad_norm": 10.58315658569336, - "learning_rate": 3.3302097514689676e-05, - "loss": 0.5723, - "step": 113330 - }, - { - "epoch": 1.0019625523789317, - "grad_norm": 0.9104031324386597, - "learning_rate": 3.3300624127017805e-05, - "loss": 0.5914, - "step": 113340 - }, - { - "epoch": 1.002050955639244, - "grad_norm": 4.153336524963379, - "learning_rate": 3.329915073934593e-05, - "loss": 0.6114, - "step": 113350 - }, - { - "epoch": 1.0021393588995562, - "grad_norm": 1.6540496349334717, - "learning_rate": 3.329767735167407e-05, - "loss": 0.6583, - "step": 113360 - }, - { - "epoch": 1.0022277621598685, - "grad_norm": 1.6964622735977173, - "learning_rate": 3.329620396400219e-05, - "loss": 0.6022, - "step": 113370 - }, - { - "epoch": 1.0023161654201806, - "grad_norm": 1.8616136312484741, - "learning_rate": 3.3294730576330325e-05, - "loss": 0.6551, - "step": 113380 - }, - { - "epoch": 1.002404568680493, - "grad_norm": 3.145517587661743, - "learning_rate": 3.329325718865845e-05, - "loss": 0.5643, - "step": 113390 - }, - { - "epoch": 1.002492971940805, - "grad_norm": 3.3268396854400635, - "learning_rate": 3.329178380098658e-05, - "loss": 0.4888, - "step": 113400 - }, - { - "epoch": 1.0025813752011175, - "grad_norm": 1.4747227430343628, - "learning_rate": 3.329031041331471e-05, - "loss": 0.465, - "step": 113410 - }, - { - "epoch": 1.0026697784614296, - "grad_norm": 2.9858028888702393, - "learning_rate": 3.328883702564284e-05, - "loss": 0.5592, - "step": 113420 - }, - { - "epoch": 1.002758181721742, - "grad_norm": 5.396865367889404, - "learning_rate": 3.328736363797097e-05, - "loss": 0.5083, - "step": 113430 - }, - { - "epoch": 1.002846584982054, - "grad_norm": 2.077924966812134, - "learning_rate": 3.32858902502991e-05, - "loss": 0.5788, - "step": 113440 - }, - { - "epoch": 1.0029349882423664, - "grad_norm": 2.6789615154266357, - "learning_rate": 3.328441686262723e-05, - "loss": 0.7885, - "step": 113450 - }, - { - "epoch": 1.0030233915026787, - "grad_norm": 8.627890586853027, - "learning_rate": 3.328294347495536e-05, - "loss": 0.6237, - "step": 113460 - }, - { - "epoch": 1.0031117947629908, - "grad_norm": 1.346265435218811, - "learning_rate": 3.328147008728349e-05, - "loss": 0.5363, - "step": 113470 - }, - { - "epoch": 1.0032001980233032, - "grad_norm": 1.7499152421951294, - "learning_rate": 3.3279996699611615e-05, - "loss": 0.4775, - "step": 113480 - }, - { - "epoch": 1.0032886012836153, - "grad_norm": 2.949223518371582, - "learning_rate": 3.3278523311939744e-05, - "loss": 0.7244, - "step": 113490 - }, - { - "epoch": 1.0033770045439276, - "grad_norm": 1.6534029245376587, - "learning_rate": 3.327704992426788e-05, - "loss": 0.6141, - "step": 113500 - }, - { - "epoch": 1.0034654078042398, - "grad_norm": 4.85213565826416, - "learning_rate": 3.327557653659601e-05, - "loss": 0.5037, - "step": 113510 - }, - { - "epoch": 1.003553811064552, - "grad_norm": 2.4136953353881836, - "learning_rate": 3.3274103148924135e-05, - "loss": 0.4695, - "step": 113520 - }, - { - "epoch": 1.0036422143248642, - "grad_norm": 2.642446279525757, - "learning_rate": 3.3272629761252264e-05, - "loss": 0.6884, - "step": 113530 - }, - { - "epoch": 1.0037306175851766, - "grad_norm": 1.989149570465088, - "learning_rate": 3.327115637358039e-05, - "loss": 0.6696, - "step": 113540 - }, - { - "epoch": 1.0038190208454887, - "grad_norm": 1.039262294769287, - "learning_rate": 3.326968298590852e-05, - "loss": 0.538, - "step": 113550 - }, - { - "epoch": 1.003907424105801, - "grad_norm": 2.7582459449768066, - "learning_rate": 3.326820959823665e-05, - "loss": 0.5875, - "step": 113560 - }, - { - "epoch": 1.0039958273661134, - "grad_norm": 1.412739872932434, - "learning_rate": 3.3266736210564784e-05, - "loss": 0.7538, - "step": 113570 - }, - { - "epoch": 1.0040842306264255, - "grad_norm": 1.9850144386291504, - "learning_rate": 3.326526282289291e-05, - "loss": 0.6752, - "step": 113580 - }, - { - "epoch": 1.0041726338867378, - "grad_norm": 10.695941925048828, - "learning_rate": 3.326378943522104e-05, - "loss": 0.5454, - "step": 113590 - }, - { - "epoch": 1.00426103714705, - "grad_norm": 5.131735801696777, - "learning_rate": 3.326231604754917e-05, - "loss": 0.8016, - "step": 113600 - }, - { - "epoch": 1.0043494404073623, - "grad_norm": 1.8428049087524414, - "learning_rate": 3.32608426598773e-05, - "loss": 0.6201, - "step": 113610 - }, - { - "epoch": 1.0044378436676744, - "grad_norm": 1.785803198814392, - "learning_rate": 3.3259369272205426e-05, - "loss": 0.5449, - "step": 113620 - }, - { - "epoch": 1.0045262469279868, - "grad_norm": 3.745408058166504, - "learning_rate": 3.325789588453356e-05, - "loss": 0.6351, - "step": 113630 - }, - { - "epoch": 1.0046146501882989, - "grad_norm": 1.622895359992981, - "learning_rate": 3.325642249686168e-05, - "loss": 0.5655, - "step": 113640 - }, - { - "epoch": 1.0047030534486112, - "grad_norm": 1.594959020614624, - "learning_rate": 3.325494910918982e-05, - "loss": 0.696, - "step": 113650 - }, - { - "epoch": 1.0047914567089233, - "grad_norm": 1.0371103286743164, - "learning_rate": 3.3253475721517946e-05, - "loss": 0.4814, - "step": 113660 - }, - { - "epoch": 1.0048798599692357, - "grad_norm": 1.7003803253173828, - "learning_rate": 3.3252002333846074e-05, - "loss": 0.5683, - "step": 113670 - }, - { - "epoch": 1.0049682632295478, - "grad_norm": 3.5081698894500732, - "learning_rate": 3.32505289461742e-05, - "loss": 0.5515, - "step": 113680 - }, - { - "epoch": 1.0050566664898601, - "grad_norm": 2.4893651008605957, - "learning_rate": 3.324905555850234e-05, - "loss": 0.6085, - "step": 113690 - }, - { - "epoch": 1.0051450697501725, - "grad_norm": 2.544677734375, - "learning_rate": 3.324758217083046e-05, - "loss": 0.501, - "step": 113700 - }, - { - "epoch": 1.0052334730104846, - "grad_norm": 8.061636924743652, - "learning_rate": 3.3246108783158594e-05, - "loss": 0.5854, - "step": 113710 - }, - { - "epoch": 1.005321876270797, - "grad_norm": 16.36623191833496, - "learning_rate": 3.324463539548672e-05, - "loss": 0.6096, - "step": 113720 - }, - { - "epoch": 1.005410279531109, - "grad_norm": 10.859704971313477, - "learning_rate": 3.324316200781485e-05, - "loss": 0.6627, - "step": 113730 - }, - { - "epoch": 1.0054986827914214, - "grad_norm": 10.567687034606934, - "learning_rate": 3.324168862014298e-05, - "loss": 0.6371, - "step": 113740 - }, - { - "epoch": 1.0055870860517335, - "grad_norm": 7.632933139801025, - "learning_rate": 3.324021523247111e-05, - "loss": 0.7607, - "step": 113750 - }, - { - "epoch": 1.0056754893120459, - "grad_norm": 2.8745486736297607, - "learning_rate": 3.3238741844799236e-05, - "loss": 0.7439, - "step": 113760 - }, - { - "epoch": 1.005763892572358, - "grad_norm": 1.6206594705581665, - "learning_rate": 3.323726845712737e-05, - "loss": 0.4246, - "step": 113770 - }, - { - "epoch": 1.0058522958326703, - "grad_norm": 2.075242757797241, - "learning_rate": 3.323579506945549e-05, - "loss": 0.6937, - "step": 113780 - }, - { - "epoch": 1.0059406990929824, - "grad_norm": 1.1037721633911133, - "learning_rate": 3.323432168178363e-05, - "loss": 0.5536, - "step": 113790 - }, - { - "epoch": 1.0060291023532948, - "grad_norm": 1.3882333040237427, - "learning_rate": 3.3232848294111756e-05, - "loss": 0.5429, - "step": 113800 - }, - { - "epoch": 1.0061175056136071, - "grad_norm": 2.6770808696746826, - "learning_rate": 3.3231374906439885e-05, - "loss": 0.6684, - "step": 113810 - }, - { - "epoch": 1.0062059088739193, - "grad_norm": 6.101493835449219, - "learning_rate": 3.322990151876801e-05, - "loss": 0.6159, - "step": 113820 - }, - { - "epoch": 1.0062943121342316, - "grad_norm": 6.200798988342285, - "learning_rate": 3.322842813109615e-05, - "loss": 0.586, - "step": 113830 - }, - { - "epoch": 1.0063827153945437, - "grad_norm": 4.1431732177734375, - "learning_rate": 3.322695474342427e-05, - "loss": 0.6897, - "step": 113840 - }, - { - "epoch": 1.006471118654856, - "grad_norm": 3.066347122192383, - "learning_rate": 3.3225481355752405e-05, - "loss": 0.5235, - "step": 113850 - }, - { - "epoch": 1.0065595219151682, - "grad_norm": 14.762816429138184, - "learning_rate": 3.322400796808053e-05, - "loss": 0.6861, - "step": 113860 - }, - { - "epoch": 1.0066479251754805, - "grad_norm": 3.053056478500366, - "learning_rate": 3.322253458040866e-05, - "loss": 0.5974, - "step": 113870 - }, - { - "epoch": 1.0067363284357926, - "grad_norm": 3.4242119789123535, - "learning_rate": 3.322106119273679e-05, - "loss": 0.6279, - "step": 113880 - }, - { - "epoch": 1.006824731696105, - "grad_norm": 2.1032440662384033, - "learning_rate": 3.321958780506492e-05, - "loss": 0.5928, - "step": 113890 - }, - { - "epoch": 1.006913134956417, - "grad_norm": 0.756652295589447, - "learning_rate": 3.321811441739305e-05, - "loss": 0.553, - "step": 113900 - }, - { - "epoch": 1.0070015382167294, - "grad_norm": 7.051733493804932, - "learning_rate": 3.321664102972118e-05, - "loss": 0.5903, - "step": 113910 - }, - { - "epoch": 1.0070899414770418, - "grad_norm": 1.7435734272003174, - "learning_rate": 3.3215167642049303e-05, - "loss": 0.5776, - "step": 113920 - }, - { - "epoch": 1.007178344737354, - "grad_norm": 1.471184492111206, - "learning_rate": 3.321369425437744e-05, - "loss": 0.6572, - "step": 113930 - }, - { - "epoch": 1.0072667479976662, - "grad_norm": 1.5176074504852295, - "learning_rate": 3.321222086670557e-05, - "loss": 0.6424, - "step": 113940 - }, - { - "epoch": 1.0073551512579784, - "grad_norm": 5.296817779541016, - "learning_rate": 3.3210747479033695e-05, - "loss": 0.7113, - "step": 113950 - }, - { - "epoch": 1.0074435545182907, - "grad_norm": 6.168008804321289, - "learning_rate": 3.3209274091361824e-05, - "loss": 0.6011, - "step": 113960 - }, - { - "epoch": 1.0075319577786028, - "grad_norm": 2.4365758895874023, - "learning_rate": 3.320780070368996e-05, - "loss": 0.6166, - "step": 113970 - }, - { - "epoch": 1.0076203610389152, - "grad_norm": 1.9212533235549927, - "learning_rate": 3.320632731601808e-05, - "loss": 0.7925, - "step": 113980 - }, - { - "epoch": 1.0077087642992273, - "grad_norm": 1.874809980392456, - "learning_rate": 3.3204853928346215e-05, - "loss": 0.6869, - "step": 113990 - }, - { - "epoch": 1.0077971675595396, - "grad_norm": 6.095668315887451, - "learning_rate": 3.320338054067434e-05, - "loss": 0.6558, - "step": 114000 - }, - { - "epoch": 1.0078855708198518, - "grad_norm": 1.9459079504013062, - "learning_rate": 3.320190715300247e-05, - "loss": 0.5956, - "step": 114010 - }, - { - "epoch": 1.007973974080164, - "grad_norm": 3.421865940093994, - "learning_rate": 3.32004337653306e-05, - "loss": 0.5842, - "step": 114020 - }, - { - "epoch": 1.0080623773404762, - "grad_norm": 4.6370038986206055, - "learning_rate": 3.319896037765873e-05, - "loss": 0.6503, - "step": 114030 - }, - { - "epoch": 1.0081507806007886, - "grad_norm": 2.3934757709503174, - "learning_rate": 3.319748698998686e-05, - "loss": 0.721, - "step": 114040 - }, - { - "epoch": 1.008239183861101, - "grad_norm": 2.066995859146118, - "learning_rate": 3.319601360231499e-05, - "loss": 0.659, - "step": 114050 - }, - { - "epoch": 1.008327587121413, - "grad_norm": 0.802885115146637, - "learning_rate": 3.3194540214643114e-05, - "loss": 0.6241, - "step": 114060 - }, - { - "epoch": 1.0084159903817254, - "grad_norm": 2.192640781402588, - "learning_rate": 3.319306682697125e-05, - "loss": 0.6469, - "step": 114070 - }, - { - "epoch": 1.0085043936420375, - "grad_norm": 2.280278205871582, - "learning_rate": 3.319159343929938e-05, - "loss": 0.6334, - "step": 114080 - }, - { - "epoch": 1.0085927969023498, - "grad_norm": 9.668909072875977, - "learning_rate": 3.3190120051627506e-05, - "loss": 0.6301, - "step": 114090 - }, - { - "epoch": 1.008681200162662, - "grad_norm": 2.2471654415130615, - "learning_rate": 3.3188646663955634e-05, - "loss": 0.6271, - "step": 114100 - }, - { - "epoch": 1.0087696034229743, - "grad_norm": 4.89241886138916, - "learning_rate": 3.318717327628376e-05, - "loss": 0.6508, - "step": 114110 - }, - { - "epoch": 1.0088580066832864, - "grad_norm": 1.3187072277069092, - "learning_rate": 3.318569988861189e-05, - "loss": 0.5368, - "step": 114120 - }, - { - "epoch": 1.0089464099435987, - "grad_norm": 1.534955382347107, - "learning_rate": 3.3184226500940026e-05, - "loss": 0.5425, - "step": 114130 - }, - { - "epoch": 1.0090348132039109, - "grad_norm": 2.378653049468994, - "learning_rate": 3.318275311326815e-05, - "loss": 0.6615, - "step": 114140 - }, - { - "epoch": 1.0091232164642232, - "grad_norm": 3.9965426921844482, - "learning_rate": 3.318127972559628e-05, - "loss": 0.7364, - "step": 114150 - }, - { - "epoch": 1.0092116197245355, - "grad_norm": 2.593505382537842, - "learning_rate": 3.317980633792441e-05, - "loss": 0.618, - "step": 114160 - }, - { - "epoch": 1.0093000229848477, - "grad_norm": 17.067840576171875, - "learning_rate": 3.317833295025254e-05, - "loss": 0.599, - "step": 114170 - }, - { - "epoch": 1.00938842624516, - "grad_norm": 2.4784748554229736, - "learning_rate": 3.317685956258067e-05, - "loss": 0.6696, - "step": 114180 - }, - { - "epoch": 1.0094768295054721, - "grad_norm": 2.6997790336608887, - "learning_rate": 3.31753861749088e-05, - "loss": 0.7048, - "step": 114190 - }, - { - "epoch": 1.0095652327657845, - "grad_norm": 1.444516897201538, - "learning_rate": 3.3173912787236924e-05, - "loss": 0.5986, - "step": 114200 - }, - { - "epoch": 1.0096536360260966, - "grad_norm": 3.4414377212524414, - "learning_rate": 3.317243939956506e-05, - "loss": 0.5343, - "step": 114210 - }, - { - "epoch": 1.009742039286409, - "grad_norm": 1.959061861038208, - "learning_rate": 3.317096601189318e-05, - "loss": 0.6388, - "step": 114220 - }, - { - "epoch": 1.009830442546721, - "grad_norm": 4.316252708435059, - "learning_rate": 3.3169492624221316e-05, - "loss": 0.653, - "step": 114230 - }, - { - "epoch": 1.0099188458070334, - "grad_norm": 1.8691909313201904, - "learning_rate": 3.3168019236549445e-05, - "loss": 0.6185, - "step": 114240 - }, - { - "epoch": 1.0100072490673455, - "grad_norm": 1.523187279701233, - "learning_rate": 3.316654584887757e-05, - "loss": 0.5836, - "step": 114250 - }, - { - "epoch": 1.0100956523276579, - "grad_norm": 2.1660549640655518, - "learning_rate": 3.31650724612057e-05, - "loss": 0.6075, - "step": 114260 - }, - { - "epoch": 1.01018405558797, - "grad_norm": 1.1963714361190796, - "learning_rate": 3.3163599073533837e-05, - "loss": 0.6646, - "step": 114270 - }, - { - "epoch": 1.0102724588482823, - "grad_norm": 1.5786654949188232, - "learning_rate": 3.316212568586196e-05, - "loss": 0.6503, - "step": 114280 - }, - { - "epoch": 1.0103608621085947, - "grad_norm": 2.0850670337677, - "learning_rate": 3.316065229819009e-05, - "loss": 0.6873, - "step": 114290 - }, - { - "epoch": 1.0104492653689068, - "grad_norm": 1.2632302045822144, - "learning_rate": 3.315917891051822e-05, - "loss": 0.5467, - "step": 114300 - }, - { - "epoch": 1.0105376686292191, - "grad_norm": 3.6498982906341553, - "learning_rate": 3.315770552284635e-05, - "loss": 0.5677, - "step": 114310 - }, - { - "epoch": 1.0106260718895312, - "grad_norm": 1.6779340505599976, - "learning_rate": 3.315623213517448e-05, - "loss": 0.5289, - "step": 114320 - }, - { - "epoch": 1.0107144751498436, - "grad_norm": 9.336527824401855, - "learning_rate": 3.3154758747502613e-05, - "loss": 0.6628, - "step": 114330 - }, - { - "epoch": 1.0108028784101557, - "grad_norm": 2.1366055011749268, - "learning_rate": 3.3153285359830735e-05, - "loss": 0.7948, - "step": 114340 - }, - { - "epoch": 1.010891281670468, - "grad_norm": 1.3133643865585327, - "learning_rate": 3.315181197215887e-05, - "loss": 0.5915, - "step": 114350 - }, - { - "epoch": 1.0109796849307802, - "grad_norm": 2.3709449768066406, - "learning_rate": 3.3150338584487e-05, - "loss": 0.7534, - "step": 114360 - }, - { - "epoch": 1.0110680881910925, - "grad_norm": 2.742271661758423, - "learning_rate": 3.314886519681513e-05, - "loss": 0.6598, - "step": 114370 - }, - { - "epoch": 1.0111564914514046, - "grad_norm": 1.0594581365585327, - "learning_rate": 3.3147391809143255e-05, - "loss": 0.6599, - "step": 114380 - }, - { - "epoch": 1.011244894711717, - "grad_norm": 2.854827880859375, - "learning_rate": 3.3145918421471384e-05, - "loss": 0.5625, - "step": 114390 - }, - { - "epoch": 1.0113332979720293, - "grad_norm": 1.4914394617080688, - "learning_rate": 3.314444503379951e-05, - "loss": 0.5157, - "step": 114400 - }, - { - "epoch": 1.0114217012323414, - "grad_norm": 2.1870899200439453, - "learning_rate": 3.314297164612765e-05, - "loss": 0.5608, - "step": 114410 - }, - { - "epoch": 1.0115101044926538, - "grad_norm": 3.653965950012207, - "learning_rate": 3.3141498258455775e-05, - "loss": 0.7114, - "step": 114420 - }, - { - "epoch": 1.011598507752966, - "grad_norm": 4.081316947937012, - "learning_rate": 3.3140024870783904e-05, - "loss": 0.6132, - "step": 114430 - }, - { - "epoch": 1.0116869110132782, - "grad_norm": 0.9579772353172302, - "learning_rate": 3.313855148311203e-05, - "loss": 0.6651, - "step": 114440 - }, - { - "epoch": 1.0117753142735904, - "grad_norm": 7.856067180633545, - "learning_rate": 3.313707809544016e-05, - "loss": 0.5882, - "step": 114450 - }, - { - "epoch": 1.0118637175339027, - "grad_norm": 2.0313918590545654, - "learning_rate": 3.313560470776829e-05, - "loss": 0.7006, - "step": 114460 - }, - { - "epoch": 1.0119521207942148, - "grad_norm": 1.9130921363830566, - "learning_rate": 3.313413132009642e-05, - "loss": 0.7166, - "step": 114470 - }, - { - "epoch": 1.0120405240545272, - "grad_norm": 0.9928189516067505, - "learning_rate": 3.313265793242455e-05, - "loss": 0.6781, - "step": 114480 - }, - { - "epoch": 1.0121289273148393, - "grad_norm": 0.9419524669647217, - "learning_rate": 3.313118454475268e-05, - "loss": 0.5059, - "step": 114490 - }, - { - "epoch": 1.0122173305751516, - "grad_norm": 1.8238518238067627, - "learning_rate": 3.312971115708081e-05, - "loss": 0.6664, - "step": 114500 - }, - { - "epoch": 1.012305733835464, - "grad_norm": 3.2623229026794434, - "learning_rate": 3.312823776940894e-05, - "loss": 0.7242, - "step": 114510 - }, - { - "epoch": 1.012394137095776, - "grad_norm": 3.2176554203033447, - "learning_rate": 3.3126764381737066e-05, - "loss": 0.6324, - "step": 114520 - }, - { - "epoch": 1.0124825403560884, - "grad_norm": 5.634082317352295, - "learning_rate": 3.3125290994065194e-05, - "loss": 0.7172, - "step": 114530 - }, - { - "epoch": 1.0125709436164005, - "grad_norm": 1.57271146774292, - "learning_rate": 3.312381760639333e-05, - "loss": 0.5841, - "step": 114540 - }, - { - "epoch": 1.0126593468767129, - "grad_norm": 5.282871723175049, - "learning_rate": 3.312234421872146e-05, - "loss": 0.5814, - "step": 114550 - }, - { - "epoch": 1.012747750137025, - "grad_norm": 3.7397422790527344, - "learning_rate": 3.3120870831049586e-05, - "loss": 0.6079, - "step": 114560 - }, - { - "epoch": 1.0128361533973373, - "grad_norm": 4.592635154724121, - "learning_rate": 3.3119397443377714e-05, - "loss": 0.715, - "step": 114570 - }, - { - "epoch": 1.0129245566576495, - "grad_norm": 3.379467725753784, - "learning_rate": 3.311792405570584e-05, - "loss": 0.5434, - "step": 114580 - }, - { - "epoch": 1.0130129599179618, - "grad_norm": 2.7413527965545654, - "learning_rate": 3.311645066803397e-05, - "loss": 0.6117, - "step": 114590 - }, - { - "epoch": 1.013101363178274, - "grad_norm": 2.3742785453796387, - "learning_rate": 3.3114977280362106e-05, - "loss": 0.576, - "step": 114600 - }, - { - "epoch": 1.0131897664385863, - "grad_norm": 0.8790023326873779, - "learning_rate": 3.311350389269023e-05, - "loss": 0.5333, - "step": 114610 - }, - { - "epoch": 1.0132781696988984, - "grad_norm": 7.006760120391846, - "learning_rate": 3.311203050501836e-05, - "loss": 0.5783, - "step": 114620 - }, - { - "epoch": 1.0133665729592107, - "grad_norm": 6.131092548370361, - "learning_rate": 3.311055711734649e-05, - "loss": 0.6316, - "step": 114630 - }, - { - "epoch": 1.013454976219523, - "grad_norm": 1.641683578491211, - "learning_rate": 3.310908372967462e-05, - "loss": 0.627, - "step": 114640 - }, - { - "epoch": 1.0135433794798352, - "grad_norm": 2.0098884105682373, - "learning_rate": 3.310761034200275e-05, - "loss": 0.6766, - "step": 114650 - }, - { - "epoch": 1.0136317827401475, - "grad_norm": 2.0325984954833984, - "learning_rate": 3.310613695433088e-05, - "loss": 0.7113, - "step": 114660 - }, - { - "epoch": 1.0137201860004597, - "grad_norm": 2.463782787322998, - "learning_rate": 3.3104663566659005e-05, - "loss": 0.7137, - "step": 114670 - }, - { - "epoch": 1.013808589260772, - "grad_norm": 3.714784622192383, - "learning_rate": 3.310319017898714e-05, - "loss": 0.5961, - "step": 114680 - }, - { - "epoch": 1.0138969925210841, - "grad_norm": 1.7615363597869873, - "learning_rate": 3.310171679131526e-05, - "loss": 0.7349, - "step": 114690 - }, - { - "epoch": 1.0139853957813965, - "grad_norm": 3.256420135498047, - "learning_rate": 3.3100243403643396e-05, - "loss": 0.6422, - "step": 114700 - }, - { - "epoch": 1.0140737990417086, - "grad_norm": 1.1652439832687378, - "learning_rate": 3.3098770015971525e-05, - "loss": 0.4512, - "step": 114710 - }, - { - "epoch": 1.014162202302021, - "grad_norm": 3.902275562286377, - "learning_rate": 3.309729662829965e-05, - "loss": 0.6091, - "step": 114720 - }, - { - "epoch": 1.014250605562333, - "grad_norm": 2.9751980304718018, - "learning_rate": 3.309582324062778e-05, - "loss": 0.7228, - "step": 114730 - }, - { - "epoch": 1.0143390088226454, - "grad_norm": 2.747573137283325, - "learning_rate": 3.3094349852955917e-05, - "loss": 0.6112, - "step": 114740 - }, - { - "epoch": 1.0144274120829577, - "grad_norm": 1.6531888246536255, - "learning_rate": 3.309287646528404e-05, - "loss": 0.5841, - "step": 114750 - }, - { - "epoch": 1.0145158153432698, - "grad_norm": 3.116415023803711, - "learning_rate": 3.309140307761217e-05, - "loss": 0.6197, - "step": 114760 - }, - { - "epoch": 1.0146042186035822, - "grad_norm": 6.16981840133667, - "learning_rate": 3.30899296899403e-05, - "loss": 0.584, - "step": 114770 - }, - { - "epoch": 1.0146926218638943, - "grad_norm": 2.065519332885742, - "learning_rate": 3.308845630226843e-05, - "loss": 0.5533, - "step": 114780 - }, - { - "epoch": 1.0147810251242066, - "grad_norm": 7.4151411056518555, - "learning_rate": 3.308698291459656e-05, - "loss": 0.623, - "step": 114790 - }, - { - "epoch": 1.0148694283845188, - "grad_norm": 1.7224183082580566, - "learning_rate": 3.3085509526924693e-05, - "loss": 0.6343, - "step": 114800 - }, - { - "epoch": 1.014957831644831, - "grad_norm": 4.719710826873779, - "learning_rate": 3.3084036139252815e-05, - "loss": 0.4838, - "step": 114810 - }, - { - "epoch": 1.0150462349051432, - "grad_norm": 2.6137685775756836, - "learning_rate": 3.308256275158095e-05, - "loss": 0.6428, - "step": 114820 - }, - { - "epoch": 1.0151346381654556, - "grad_norm": 1.3227020502090454, - "learning_rate": 3.308108936390907e-05, - "loss": 0.5614, - "step": 114830 - }, - { - "epoch": 1.0152230414257677, - "grad_norm": 9.94175910949707, - "learning_rate": 3.307961597623721e-05, - "loss": 0.8337, - "step": 114840 - }, - { - "epoch": 1.01531144468608, - "grad_norm": 13.124799728393555, - "learning_rate": 3.3078142588565335e-05, - "loss": 0.6158, - "step": 114850 - }, - { - "epoch": 1.0153998479463922, - "grad_norm": 2.885446786880493, - "learning_rate": 3.3076669200893464e-05, - "loss": 0.7418, - "step": 114860 - }, - { - "epoch": 1.0154882512067045, - "grad_norm": 5.281364917755127, - "learning_rate": 3.307519581322159e-05, - "loss": 0.5997, - "step": 114870 - }, - { - "epoch": 1.0155766544670168, - "grad_norm": 1.1485543251037598, - "learning_rate": 3.307372242554973e-05, - "loss": 0.6214, - "step": 114880 - }, - { - "epoch": 1.015665057727329, - "grad_norm": 1.0571945905685425, - "learning_rate": 3.307224903787785e-05, - "loss": 0.4867, - "step": 114890 - }, - { - "epoch": 1.0157534609876413, - "grad_norm": 9.413779258728027, - "learning_rate": 3.3070775650205984e-05, - "loss": 0.5908, - "step": 114900 - }, - { - "epoch": 1.0158418642479534, - "grad_norm": 1.4142274856567383, - "learning_rate": 3.306930226253411e-05, - "loss": 0.5757, - "step": 114910 - }, - { - "epoch": 1.0159302675082658, - "grad_norm": 6.0000457763671875, - "learning_rate": 3.306782887486224e-05, - "loss": 0.5736, - "step": 114920 - }, - { - "epoch": 1.0160186707685779, - "grad_norm": 4.925318241119385, - "learning_rate": 3.306635548719037e-05, - "loss": 0.5809, - "step": 114930 - }, - { - "epoch": 1.0161070740288902, - "grad_norm": 2.033148765563965, - "learning_rate": 3.30648820995185e-05, - "loss": 0.6067, - "step": 114940 - }, - { - "epoch": 1.0161954772892023, - "grad_norm": 3.5339834690093994, - "learning_rate": 3.3063408711846626e-05, - "loss": 0.5897, - "step": 114950 - }, - { - "epoch": 1.0162838805495147, - "grad_norm": 1.1234498023986816, - "learning_rate": 3.306193532417476e-05, - "loss": 0.5069, - "step": 114960 - }, - { - "epoch": 1.0163722838098268, - "grad_norm": 7.30183219909668, - "learning_rate": 3.306046193650288e-05, - "loss": 0.6961, - "step": 114970 - }, - { - "epoch": 1.0164606870701391, - "grad_norm": 4.576351642608643, - "learning_rate": 3.305898854883102e-05, - "loss": 0.7681, - "step": 114980 - }, - { - "epoch": 1.0165490903304515, - "grad_norm": 4.762503147125244, - "learning_rate": 3.3057515161159146e-05, - "loss": 0.6715, - "step": 114990 - }, - { - "epoch": 1.0166374935907636, - "grad_norm": 2.4711084365844727, - "learning_rate": 3.3056041773487274e-05, - "loss": 0.5581, - "step": 115000 - }, - { - "epoch": 1.016725896851076, - "grad_norm": 1.7345740795135498, - "learning_rate": 3.30545683858154e-05, - "loss": 0.6539, - "step": 115010 - }, - { - "epoch": 1.016814300111388, - "grad_norm": 2.8621976375579834, - "learning_rate": 3.305309499814354e-05, - "loss": 0.6463, - "step": 115020 - }, - { - "epoch": 1.0169027033717004, - "grad_norm": 2.12165904045105, - "learning_rate": 3.305162161047166e-05, - "loss": 0.5, - "step": 115030 - }, - { - "epoch": 1.0169911066320125, - "grad_norm": 0.9638975262641907, - "learning_rate": 3.3050148222799794e-05, - "loss": 0.5905, - "step": 115040 - }, - { - "epoch": 1.0170795098923249, - "grad_norm": 2.3649346828460693, - "learning_rate": 3.3048674835127916e-05, - "loss": 0.642, - "step": 115050 - }, - { - "epoch": 1.017167913152637, - "grad_norm": 1.0278958082199097, - "learning_rate": 3.304720144745605e-05, - "loss": 0.5289, - "step": 115060 - }, - { - "epoch": 1.0172563164129493, - "grad_norm": 2.411811351776123, - "learning_rate": 3.304572805978418e-05, - "loss": 0.5647, - "step": 115070 - }, - { - "epoch": 1.0173447196732615, - "grad_norm": 3.973703384399414, - "learning_rate": 3.304425467211231e-05, - "loss": 0.5368, - "step": 115080 - }, - { - "epoch": 1.0174331229335738, - "grad_norm": 2.5362212657928467, - "learning_rate": 3.3042781284440436e-05, - "loss": 0.7078, - "step": 115090 - }, - { - "epoch": 1.0175215261938861, - "grad_norm": 2.3636996746063232, - "learning_rate": 3.304130789676857e-05, - "loss": 0.7232, - "step": 115100 - }, - { - "epoch": 1.0176099294541983, - "grad_norm": 2.8480608463287354, - "learning_rate": 3.303983450909669e-05, - "loss": 0.6377, - "step": 115110 - }, - { - "epoch": 1.0176983327145106, - "grad_norm": 1.7566604614257812, - "learning_rate": 3.303836112142483e-05, - "loss": 0.6525, - "step": 115120 - }, - { - "epoch": 1.0177867359748227, - "grad_norm": 1.5804463624954224, - "learning_rate": 3.3036887733752956e-05, - "loss": 0.5934, - "step": 115130 - }, - { - "epoch": 1.017875139235135, - "grad_norm": 8.386014938354492, - "learning_rate": 3.3035414346081085e-05, - "loss": 0.595, - "step": 115140 - }, - { - "epoch": 1.0179635424954472, - "grad_norm": 2.0764265060424805, - "learning_rate": 3.303394095840921e-05, - "loss": 0.6004, - "step": 115150 - }, - { - "epoch": 1.0180519457557595, - "grad_norm": 2.2943568229675293, - "learning_rate": 3.303246757073734e-05, - "loss": 0.7777, - "step": 115160 - }, - { - "epoch": 1.0181403490160716, - "grad_norm": 0.6454117298126221, - "learning_rate": 3.303099418306547e-05, - "loss": 0.5751, - "step": 115170 - }, - { - "epoch": 1.018228752276384, - "grad_norm": 2.7356128692626953, - "learning_rate": 3.3029520795393605e-05, - "loss": 0.6796, - "step": 115180 - }, - { - "epoch": 1.018317155536696, - "grad_norm": 1.1771854162216187, - "learning_rate": 3.3028047407721726e-05, - "loss": 0.6093, - "step": 115190 - }, - { - "epoch": 1.0184055587970084, - "grad_norm": 2.6923511028289795, - "learning_rate": 3.302657402004986e-05, - "loss": 0.603, - "step": 115200 - }, - { - "epoch": 1.0184939620573206, - "grad_norm": 2.4413962364196777, - "learning_rate": 3.302510063237799e-05, - "loss": 0.6456, - "step": 115210 - }, - { - "epoch": 1.018582365317633, - "grad_norm": 1.724786639213562, - "learning_rate": 3.302362724470612e-05, - "loss": 0.5813, - "step": 115220 - }, - { - "epoch": 1.0186707685779453, - "grad_norm": 1.0470874309539795, - "learning_rate": 3.3022153857034247e-05, - "loss": 0.5502, - "step": 115230 - }, - { - "epoch": 1.0187591718382574, - "grad_norm": 2.4003427028656006, - "learning_rate": 3.302068046936238e-05, - "loss": 0.6398, - "step": 115240 - }, - { - "epoch": 1.0188475750985697, - "grad_norm": 9.031026840209961, - "learning_rate": 3.30192070816905e-05, - "loss": 0.644, - "step": 115250 - }, - { - "epoch": 1.0189359783588818, - "grad_norm": 1.9744430780410767, - "learning_rate": 3.301773369401864e-05, - "loss": 0.5874, - "step": 115260 - }, - { - "epoch": 1.0190243816191942, - "grad_norm": 3.2505557537078857, - "learning_rate": 3.301626030634677e-05, - "loss": 0.6277, - "step": 115270 - }, - { - "epoch": 1.0191127848795063, - "grad_norm": 3.8930466175079346, - "learning_rate": 3.3014786918674895e-05, - "loss": 0.6859, - "step": 115280 - }, - { - "epoch": 1.0192011881398186, - "grad_norm": 6.320791721343994, - "learning_rate": 3.3013313531003023e-05, - "loss": 0.5608, - "step": 115290 - }, - { - "epoch": 1.0192895914001308, - "grad_norm": 3.1180779933929443, - "learning_rate": 3.301184014333115e-05, - "loss": 0.6638, - "step": 115300 - }, - { - "epoch": 1.019377994660443, - "grad_norm": 2.680518627166748, - "learning_rate": 3.301036675565928e-05, - "loss": 0.6795, - "step": 115310 - }, - { - "epoch": 1.0194663979207552, - "grad_norm": 2.081961154937744, - "learning_rate": 3.3008893367987415e-05, - "loss": 0.6785, - "step": 115320 - }, - { - "epoch": 1.0195548011810676, - "grad_norm": 7.333011150360107, - "learning_rate": 3.3007419980315544e-05, - "loss": 0.6141, - "step": 115330 - }, - { - "epoch": 1.01964320444138, - "grad_norm": 1.7128896713256836, - "learning_rate": 3.300594659264367e-05, - "loss": 0.6972, - "step": 115340 - }, - { - "epoch": 1.019731607701692, - "grad_norm": 8.286114692687988, - "learning_rate": 3.30044732049718e-05, - "loss": 0.5715, - "step": 115350 - }, - { - "epoch": 1.0198200109620044, - "grad_norm": 0.9567068815231323, - "learning_rate": 3.300299981729993e-05, - "loss": 0.5846, - "step": 115360 - }, - { - "epoch": 1.0199084142223165, - "grad_norm": 0.8313459753990173, - "learning_rate": 3.300152642962806e-05, - "loss": 0.6588, - "step": 115370 - }, - { - "epoch": 1.0199968174826288, - "grad_norm": 1.2403515577316284, - "learning_rate": 3.300005304195619e-05, - "loss": 0.5241, - "step": 115380 - }, - { - "epoch": 1.020085220742941, - "grad_norm": 1.7476023435592651, - "learning_rate": 3.299857965428432e-05, - "loss": 0.5517, - "step": 115390 - }, - { - "epoch": 1.0201736240032533, - "grad_norm": 3.253467321395874, - "learning_rate": 3.299710626661245e-05, - "loss": 0.5874, - "step": 115400 - }, - { - "epoch": 1.0202620272635654, - "grad_norm": 3.416421413421631, - "learning_rate": 3.299563287894058e-05, - "loss": 0.6567, - "step": 115410 - }, - { - "epoch": 1.0203504305238777, - "grad_norm": 12.988905906677246, - "learning_rate": 3.2994159491268706e-05, - "loss": 0.577, - "step": 115420 - }, - { - "epoch": 1.0204388337841899, - "grad_norm": 3.3279874324798584, - "learning_rate": 3.2992686103596834e-05, - "loss": 0.472, - "step": 115430 - }, - { - "epoch": 1.0205272370445022, - "grad_norm": 8.408232688903809, - "learning_rate": 3.299121271592496e-05, - "loss": 0.6969, - "step": 115440 - }, - { - "epoch": 1.0206156403048143, - "grad_norm": 1.940561056137085, - "learning_rate": 3.29897393282531e-05, - "loss": 0.5783, - "step": 115450 - }, - { - "epoch": 1.0207040435651267, - "grad_norm": 3.3989226818084717, - "learning_rate": 3.2988265940581226e-05, - "loss": 0.6387, - "step": 115460 - }, - { - "epoch": 1.020792446825439, - "grad_norm": 2.806227684020996, - "learning_rate": 3.2986792552909354e-05, - "loss": 0.593, - "step": 115470 - }, - { - "epoch": 1.0208808500857511, - "grad_norm": 2.114802360534668, - "learning_rate": 3.298531916523748e-05, - "loss": 0.5895, - "step": 115480 - }, - { - "epoch": 1.0209692533460635, - "grad_norm": 1.941298246383667, - "learning_rate": 3.298384577756561e-05, - "loss": 0.5382, - "step": 115490 - }, - { - "epoch": 1.0210576566063756, - "grad_norm": 2.512268304824829, - "learning_rate": 3.298237238989374e-05, - "loss": 0.7258, - "step": 115500 - }, - { - "epoch": 1.021146059866688, - "grad_norm": 2.1646790504455566, - "learning_rate": 3.2980899002221874e-05, - "loss": 0.6303, - "step": 115510 - }, - { - "epoch": 1.021234463127, - "grad_norm": 3.8876559734344482, - "learning_rate": 3.2979425614549996e-05, - "loss": 0.5748, - "step": 115520 - }, - { - "epoch": 1.0213228663873124, - "grad_norm": 3.440377950668335, - "learning_rate": 3.297795222687813e-05, - "loss": 0.6175, - "step": 115530 - }, - { - "epoch": 1.0214112696476245, - "grad_norm": 5.650941371917725, - "learning_rate": 3.297647883920626e-05, - "loss": 0.6, - "step": 115540 - }, - { - "epoch": 1.0214996729079369, - "grad_norm": 1.6975969076156616, - "learning_rate": 3.297500545153439e-05, - "loss": 0.602, - "step": 115550 - }, - { - "epoch": 1.021588076168249, - "grad_norm": 2.118499279022217, - "learning_rate": 3.2973532063862516e-05, - "loss": 0.6747, - "step": 115560 - }, - { - "epoch": 1.0216764794285613, - "grad_norm": 2.2729036808013916, - "learning_rate": 3.297205867619065e-05, - "loss": 0.5729, - "step": 115570 - }, - { - "epoch": 1.0217648826888737, - "grad_norm": 5.56952428817749, - "learning_rate": 3.297058528851877e-05, - "loss": 0.575, - "step": 115580 - }, - { - "epoch": 1.0218532859491858, - "grad_norm": 2.339576244354248, - "learning_rate": 3.296911190084691e-05, - "loss": 0.6363, - "step": 115590 - }, - { - "epoch": 1.0219416892094981, - "grad_norm": 5.936741352081299, - "learning_rate": 3.2967638513175036e-05, - "loss": 0.6227, - "step": 115600 - }, - { - "epoch": 1.0220300924698102, - "grad_norm": 6.292945384979248, - "learning_rate": 3.2966165125503165e-05, - "loss": 0.6724, - "step": 115610 - }, - { - "epoch": 1.0221184957301226, - "grad_norm": 0.6593518853187561, - "learning_rate": 3.296469173783129e-05, - "loss": 0.5803, - "step": 115620 - }, - { - "epoch": 1.0222068989904347, - "grad_norm": 4.837062358856201, - "learning_rate": 3.296321835015942e-05, - "loss": 0.4487, - "step": 115630 - }, - { - "epoch": 1.022295302250747, - "grad_norm": 2.0359268188476562, - "learning_rate": 3.296174496248755e-05, - "loss": 0.5896, - "step": 115640 - }, - { - "epoch": 1.0223837055110592, - "grad_norm": 0.9177247285842896, - "learning_rate": 3.2960271574815685e-05, - "loss": 0.6012, - "step": 115650 - }, - { - "epoch": 1.0224721087713715, - "grad_norm": 2.142111301422119, - "learning_rate": 3.2958798187143806e-05, - "loss": 0.7178, - "step": 115660 - }, - { - "epoch": 1.0225605120316836, - "grad_norm": 1.4394023418426514, - "learning_rate": 3.295732479947194e-05, - "loss": 0.6407, - "step": 115670 - }, - { - "epoch": 1.022648915291996, - "grad_norm": 2.4677865505218506, - "learning_rate": 3.295585141180007e-05, - "loss": 0.5853, - "step": 115680 - }, - { - "epoch": 1.0227373185523083, - "grad_norm": 2.486605405807495, - "learning_rate": 3.29543780241282e-05, - "loss": 0.6149, - "step": 115690 - }, - { - "epoch": 1.0228257218126204, - "grad_norm": 2.982140064239502, - "learning_rate": 3.295290463645633e-05, - "loss": 0.5306, - "step": 115700 - }, - { - "epoch": 1.0229141250729328, - "grad_norm": 10.542850494384766, - "learning_rate": 3.295143124878446e-05, - "loss": 0.6284, - "step": 115710 - }, - { - "epoch": 1.023002528333245, - "grad_norm": 2.162801742553711, - "learning_rate": 3.294995786111258e-05, - "loss": 0.6244, - "step": 115720 - }, - { - "epoch": 1.0230909315935572, - "grad_norm": 2.2199089527130127, - "learning_rate": 3.294848447344072e-05, - "loss": 0.7403, - "step": 115730 - }, - { - "epoch": 1.0231793348538694, - "grad_norm": 1.796887755393982, - "learning_rate": 3.294701108576885e-05, - "loss": 0.8027, - "step": 115740 - }, - { - "epoch": 1.0232677381141817, - "grad_norm": 2.835404872894287, - "learning_rate": 3.2945537698096975e-05, - "loss": 0.6643, - "step": 115750 - }, - { - "epoch": 1.0233561413744938, - "grad_norm": 1.3645983934402466, - "learning_rate": 3.2944064310425104e-05, - "loss": 0.6417, - "step": 115760 - }, - { - "epoch": 1.0234445446348062, - "grad_norm": 3.714608669281006, - "learning_rate": 3.294259092275323e-05, - "loss": 0.6954, - "step": 115770 - }, - { - "epoch": 1.0235329478951183, - "grad_norm": 14.418932914733887, - "learning_rate": 3.294111753508136e-05, - "loss": 0.5623, - "step": 115780 - }, - { - "epoch": 1.0236213511554306, - "grad_norm": 4.085190773010254, - "learning_rate": 3.2939644147409495e-05, - "loss": 0.6271, - "step": 115790 - }, - { - "epoch": 1.0237097544157427, - "grad_norm": 1.3367069959640503, - "learning_rate": 3.293817075973762e-05, - "loss": 0.4774, - "step": 115800 - }, - { - "epoch": 1.023798157676055, - "grad_norm": 5.851768970489502, - "learning_rate": 3.293669737206575e-05, - "loss": 0.6517, - "step": 115810 - }, - { - "epoch": 1.0238865609363674, - "grad_norm": 1.6313979625701904, - "learning_rate": 3.293522398439388e-05, - "loss": 0.5888, - "step": 115820 - }, - { - "epoch": 1.0239749641966795, - "grad_norm": 1.676857352256775, - "learning_rate": 3.293375059672201e-05, - "loss": 0.6169, - "step": 115830 - }, - { - "epoch": 1.024063367456992, - "grad_norm": 9.19443416595459, - "learning_rate": 3.293227720905014e-05, - "loss": 0.7504, - "step": 115840 - }, - { - "epoch": 1.024151770717304, - "grad_norm": 4.627795219421387, - "learning_rate": 3.293080382137827e-05, - "loss": 0.7622, - "step": 115850 - }, - { - "epoch": 1.0242401739776164, - "grad_norm": 2.1353626251220703, - "learning_rate": 3.2929330433706394e-05, - "loss": 0.6792, - "step": 115860 - }, - { - "epoch": 1.0243285772379285, - "grad_norm": 1.4297456741333008, - "learning_rate": 3.292785704603453e-05, - "loss": 0.6547, - "step": 115870 - }, - { - "epoch": 1.0244169804982408, - "grad_norm": 4.945678234100342, - "learning_rate": 3.292638365836265e-05, - "loss": 0.5383, - "step": 115880 - }, - { - "epoch": 1.024505383758553, - "grad_norm": 1.5775309801101685, - "learning_rate": 3.2924910270690786e-05, - "loss": 0.5684, - "step": 115890 - }, - { - "epoch": 1.0245937870188653, - "grad_norm": 11.50912094116211, - "learning_rate": 3.2923436883018914e-05, - "loss": 0.6661, - "step": 115900 - }, - { - "epoch": 1.0246821902791774, - "grad_norm": 3.94026517868042, - "learning_rate": 3.292196349534704e-05, - "loss": 0.5846, - "step": 115910 - }, - { - "epoch": 1.0247705935394897, - "grad_norm": 1.2065709829330444, - "learning_rate": 3.292049010767517e-05, - "loss": 0.5183, - "step": 115920 - }, - { - "epoch": 1.024858996799802, - "grad_norm": 2.126706123352051, - "learning_rate": 3.2919016720003306e-05, - "loss": 0.6142, - "step": 115930 - }, - { - "epoch": 1.0249474000601142, - "grad_norm": 9.068276405334473, - "learning_rate": 3.291754333233143e-05, - "loss": 0.7343, - "step": 115940 - }, - { - "epoch": 1.0250358033204265, - "grad_norm": 1.9008930921554565, - "learning_rate": 3.291606994465956e-05, - "loss": 0.6975, - "step": 115950 - }, - { - "epoch": 1.0251242065807387, - "grad_norm": 2.7840113639831543, - "learning_rate": 3.291459655698769e-05, - "loss": 0.5029, - "step": 115960 - }, - { - "epoch": 1.025212609841051, - "grad_norm": 2.1668808460235596, - "learning_rate": 3.291312316931582e-05, - "loss": 0.5861, - "step": 115970 - }, - { - "epoch": 1.0253010131013631, - "grad_norm": 15.260984420776367, - "learning_rate": 3.291164978164395e-05, - "loss": 0.5989, - "step": 115980 - }, - { - "epoch": 1.0253894163616755, - "grad_norm": 3.3249385356903076, - "learning_rate": 3.2910176393972076e-05, - "loss": 0.6341, - "step": 115990 - }, - { - "epoch": 1.0254778196219876, - "grad_norm": 4.381968021392822, - "learning_rate": 3.2908703006300204e-05, - "loss": 0.5492, - "step": 116000 - }, - { - "epoch": 1.0255662228823, - "grad_norm": 2.3023040294647217, - "learning_rate": 3.290722961862834e-05, - "loss": 0.6113, - "step": 116010 - }, - { - "epoch": 1.025654626142612, - "grad_norm": 1.339086890220642, - "learning_rate": 3.290575623095646e-05, - "loss": 0.42, - "step": 116020 - }, - { - "epoch": 1.0257430294029244, - "grad_norm": 5.605476379394531, - "learning_rate": 3.2904282843284596e-05, - "loss": 0.6983, - "step": 116030 - }, - { - "epoch": 1.0258314326632365, - "grad_norm": 5.244485378265381, - "learning_rate": 3.2902809455612725e-05, - "loss": 0.6016, - "step": 116040 - }, - { - "epoch": 1.0259198359235489, - "grad_norm": 4.918577194213867, - "learning_rate": 3.290133606794085e-05, - "loss": 0.7356, - "step": 116050 - }, - { - "epoch": 1.0260082391838612, - "grad_norm": 9.049163818359375, - "learning_rate": 3.289986268026898e-05, - "loss": 0.6026, - "step": 116060 - }, - { - "epoch": 1.0260966424441733, - "grad_norm": 2.0767617225646973, - "learning_rate": 3.2898389292597116e-05, - "loss": 0.5932, - "step": 116070 - }, - { - "epoch": 1.0261850457044857, - "grad_norm": 2.3189079761505127, - "learning_rate": 3.289691590492524e-05, - "loss": 0.6042, - "step": 116080 - }, - { - "epoch": 1.0262734489647978, - "grad_norm": 1.4300647974014282, - "learning_rate": 3.289544251725337e-05, - "loss": 0.7043, - "step": 116090 - }, - { - "epoch": 1.0263618522251101, - "grad_norm": 2.61091947555542, - "learning_rate": 3.2893969129581495e-05, - "loss": 0.5071, - "step": 116100 - }, - { - "epoch": 1.0264502554854222, - "grad_norm": 3.349518060684204, - "learning_rate": 3.289249574190963e-05, - "loss": 0.5487, - "step": 116110 - }, - { - "epoch": 1.0265386587457346, - "grad_norm": 2.2534759044647217, - "learning_rate": 3.289102235423776e-05, - "loss": 0.6715, - "step": 116120 - }, - { - "epoch": 1.0266270620060467, - "grad_norm": 6.956655025482178, - "learning_rate": 3.2889548966565887e-05, - "loss": 0.5354, - "step": 116130 - }, - { - "epoch": 1.026715465266359, - "grad_norm": 1.267991304397583, - "learning_rate": 3.2888075578894015e-05, - "loss": 0.5453, - "step": 116140 - }, - { - "epoch": 1.0268038685266712, - "grad_norm": 4.1753010749816895, - "learning_rate": 3.288660219122215e-05, - "loss": 0.6389, - "step": 116150 - }, - { - "epoch": 1.0268922717869835, - "grad_norm": 2.0763416290283203, - "learning_rate": 3.288512880355027e-05, - "loss": 0.5779, - "step": 116160 - }, - { - "epoch": 1.0269806750472958, - "grad_norm": 1.9550834894180298, - "learning_rate": 3.288365541587841e-05, - "loss": 0.6679, - "step": 116170 - }, - { - "epoch": 1.027069078307608, - "grad_norm": 2.5839273929595947, - "learning_rate": 3.2882182028206535e-05, - "loss": 0.65, - "step": 116180 - }, - { - "epoch": 1.0271574815679203, - "grad_norm": 49.472232818603516, - "learning_rate": 3.2880708640534663e-05, - "loss": 0.7485, - "step": 116190 - }, - { - "epoch": 1.0272458848282324, - "grad_norm": 1.6635946035385132, - "learning_rate": 3.287923525286279e-05, - "loss": 0.5659, - "step": 116200 - }, - { - "epoch": 1.0273342880885448, - "grad_norm": 1.6751060485839844, - "learning_rate": 3.287776186519093e-05, - "loss": 0.5833, - "step": 116210 - }, - { - "epoch": 1.0274226913488569, - "grad_norm": 5.592175006866455, - "learning_rate": 3.287628847751905e-05, - "loss": 0.5921, - "step": 116220 - }, - { - "epoch": 1.0275110946091692, - "grad_norm": 2.4472837448120117, - "learning_rate": 3.2874815089847184e-05, - "loss": 0.7164, - "step": 116230 - }, - { - "epoch": 1.0275994978694813, - "grad_norm": 3.849940776824951, - "learning_rate": 3.287334170217531e-05, - "loss": 0.604, - "step": 116240 - }, - { - "epoch": 1.0276879011297937, - "grad_norm": 6.839824676513672, - "learning_rate": 3.287186831450344e-05, - "loss": 0.592, - "step": 116250 - }, - { - "epoch": 1.0277763043901058, - "grad_norm": 12.123258590698242, - "learning_rate": 3.287039492683157e-05, - "loss": 0.802, - "step": 116260 - }, - { - "epoch": 1.0278647076504182, - "grad_norm": 3.9698305130004883, - "learning_rate": 3.28689215391597e-05, - "loss": 0.6043, - "step": 116270 - }, - { - "epoch": 1.0279531109107305, - "grad_norm": 4.2516632080078125, - "learning_rate": 3.2867448151487825e-05, - "loss": 0.6398, - "step": 116280 - }, - { - "epoch": 1.0280415141710426, - "grad_norm": 5.811633110046387, - "learning_rate": 3.286597476381596e-05, - "loss": 0.635, - "step": 116290 - }, - { - "epoch": 1.028129917431355, - "grad_norm": 3.5544073581695557, - "learning_rate": 3.286450137614409e-05, - "loss": 0.6762, - "step": 116300 - }, - { - "epoch": 1.028218320691667, - "grad_norm": 8.247230529785156, - "learning_rate": 3.286302798847222e-05, - "loss": 0.7066, - "step": 116310 - }, - { - "epoch": 1.0283067239519794, - "grad_norm": 2.4690587520599365, - "learning_rate": 3.2861554600800346e-05, - "loss": 0.5071, - "step": 116320 - }, - { - "epoch": 1.0283951272122915, - "grad_norm": 1.889485239982605, - "learning_rate": 3.2860081213128474e-05, - "loss": 0.6758, - "step": 116330 - }, - { - "epoch": 1.0284835304726039, - "grad_norm": 3.0156872272491455, - "learning_rate": 3.28586078254566e-05, - "loss": 0.6799, - "step": 116340 - }, - { - "epoch": 1.028571933732916, - "grad_norm": 3.3365800380706787, - "learning_rate": 3.285713443778473e-05, - "loss": 0.6223, - "step": 116350 - }, - { - "epoch": 1.0286603369932283, - "grad_norm": 25.31796646118164, - "learning_rate": 3.2855661050112866e-05, - "loss": 0.6178, - "step": 116360 - }, - { - "epoch": 1.0287487402535405, - "grad_norm": 4.635453701019287, - "learning_rate": 3.2854187662440994e-05, - "loss": 0.6073, - "step": 116370 - }, - { - "epoch": 1.0288371435138528, - "grad_norm": 1.779373288154602, - "learning_rate": 3.285271427476912e-05, - "loss": 0.6033, - "step": 116380 - }, - { - "epoch": 1.028925546774165, - "grad_norm": 3.0340542793273926, - "learning_rate": 3.285124088709725e-05, - "loss": 0.6538, - "step": 116390 - }, - { - "epoch": 1.0290139500344773, - "grad_norm": 1.6494413614273071, - "learning_rate": 3.284976749942538e-05, - "loss": 0.649, - "step": 116400 - }, - { - "epoch": 1.0291023532947896, - "grad_norm": 9.002840995788574, - "learning_rate": 3.284829411175351e-05, - "loss": 0.6207, - "step": 116410 - }, - { - "epoch": 1.0291907565551017, - "grad_norm": 1.5056276321411133, - "learning_rate": 3.284682072408164e-05, - "loss": 0.68, - "step": 116420 - }, - { - "epoch": 1.029279159815414, - "grad_norm": 4.635558605194092, - "learning_rate": 3.284534733640977e-05, - "loss": 0.6925, - "step": 116430 - }, - { - "epoch": 1.0293675630757262, - "grad_norm": 11.118827819824219, - "learning_rate": 3.28438739487379e-05, - "loss": 0.4654, - "step": 116440 - }, - { - "epoch": 1.0294559663360385, - "grad_norm": 13.458061218261719, - "learning_rate": 3.284240056106603e-05, - "loss": 0.6874, - "step": 116450 - }, - { - "epoch": 1.0295443695963507, - "grad_norm": 12.226763725280762, - "learning_rate": 3.2840927173394156e-05, - "loss": 0.6744, - "step": 116460 - }, - { - "epoch": 1.029632772856663, - "grad_norm": 9.540175437927246, - "learning_rate": 3.2839453785722284e-05, - "loss": 0.5955, - "step": 116470 - }, - { - "epoch": 1.0297211761169751, - "grad_norm": 9.096881866455078, - "learning_rate": 3.283798039805042e-05, - "loss": 0.4888, - "step": 116480 - }, - { - "epoch": 1.0298095793772875, - "grad_norm": 4.424280166625977, - "learning_rate": 3.283650701037854e-05, - "loss": 0.4796, - "step": 116490 - }, - { - "epoch": 1.0298979826375996, - "grad_norm": 2.7132251262664795, - "learning_rate": 3.2835033622706676e-05, - "loss": 0.5185, - "step": 116500 - }, - { - "epoch": 1.029986385897912, - "grad_norm": 1.077351689338684, - "learning_rate": 3.2833560235034805e-05, - "loss": 0.5455, - "step": 116510 - }, - { - "epoch": 1.0300747891582243, - "grad_norm": 2.441951036453247, - "learning_rate": 3.283208684736293e-05, - "loss": 0.624, - "step": 116520 - }, - { - "epoch": 1.0301631924185364, - "grad_norm": 3.245570421218872, - "learning_rate": 3.283061345969106e-05, - "loss": 0.6914, - "step": 116530 - }, - { - "epoch": 1.0302515956788487, - "grad_norm": 2.5124316215515137, - "learning_rate": 3.2829140072019196e-05, - "loss": 0.6638, - "step": 116540 - }, - { - "epoch": 1.0303399989391608, - "grad_norm": 2.594841241836548, - "learning_rate": 3.282766668434732e-05, - "loss": 0.6965, - "step": 116550 - }, - { - "epoch": 1.0304284021994732, - "grad_norm": 1.13369882106781, - "learning_rate": 3.282619329667545e-05, - "loss": 0.557, - "step": 116560 - }, - { - "epoch": 1.0305168054597853, - "grad_norm": 2.8408596515655518, - "learning_rate": 3.2824719909003575e-05, - "loss": 0.6678, - "step": 116570 - }, - { - "epoch": 1.0306052087200976, - "grad_norm": 0.7319878339767456, - "learning_rate": 3.282324652133171e-05, - "loss": 0.5983, - "step": 116580 - }, - { - "epoch": 1.0306936119804098, - "grad_norm": 4.826745510101318, - "learning_rate": 3.282177313365984e-05, - "loss": 0.5915, - "step": 116590 - }, - { - "epoch": 1.030782015240722, - "grad_norm": 3.338949680328369, - "learning_rate": 3.2820299745987967e-05, - "loss": 0.666, - "step": 116600 - }, - { - "epoch": 1.0308704185010342, - "grad_norm": 12.315072059631348, - "learning_rate": 3.2818826358316095e-05, - "loss": 0.6548, - "step": 116610 - }, - { - "epoch": 1.0309588217613466, - "grad_norm": 1.5388741493225098, - "learning_rate": 3.281735297064423e-05, - "loss": 0.5622, - "step": 116620 - }, - { - "epoch": 1.0310472250216587, - "grad_norm": 1.8703491687774658, - "learning_rate": 3.281587958297235e-05, - "loss": 0.5399, - "step": 116630 - }, - { - "epoch": 1.031135628281971, - "grad_norm": 1.3267319202423096, - "learning_rate": 3.281440619530049e-05, - "loss": 0.6587, - "step": 116640 - }, - { - "epoch": 1.0312240315422834, - "grad_norm": 5.569469451904297, - "learning_rate": 3.2812932807628615e-05, - "loss": 0.5705, - "step": 116650 - }, - { - "epoch": 1.0313124348025955, - "grad_norm": 3.2137794494628906, - "learning_rate": 3.2811459419956743e-05, - "loss": 0.4815, - "step": 116660 - }, - { - "epoch": 1.0314008380629078, - "grad_norm": 2.425694227218628, - "learning_rate": 3.280998603228487e-05, - "loss": 0.5265, - "step": 116670 - }, - { - "epoch": 1.03148924132322, - "grad_norm": 3.39157772064209, - "learning_rate": 3.280851264461301e-05, - "loss": 0.7208, - "step": 116680 - }, - { - "epoch": 1.0315776445835323, - "grad_norm": 2.22771954536438, - "learning_rate": 3.280703925694113e-05, - "loss": 0.7884, - "step": 116690 - }, - { - "epoch": 1.0316660478438444, - "grad_norm": 3.204160451889038, - "learning_rate": 3.2805565869269264e-05, - "loss": 0.6452, - "step": 116700 - }, - { - "epoch": 1.0317544511041568, - "grad_norm": 4.634530544281006, - "learning_rate": 3.2804092481597385e-05, - "loss": 0.559, - "step": 116710 - }, - { - "epoch": 1.0318428543644689, - "grad_norm": 2.596842050552368, - "learning_rate": 3.280261909392552e-05, - "loss": 0.6561, - "step": 116720 - }, - { - "epoch": 1.0319312576247812, - "grad_norm": 1.9425549507141113, - "learning_rate": 3.280114570625365e-05, - "loss": 0.6915, - "step": 116730 - }, - { - "epoch": 1.0320196608850933, - "grad_norm": 2.7140884399414062, - "learning_rate": 3.279967231858178e-05, - "loss": 0.6119, - "step": 116740 - }, - { - "epoch": 1.0321080641454057, - "grad_norm": 1.9476137161254883, - "learning_rate": 3.2798198930909905e-05, - "loss": 0.7827, - "step": 116750 - }, - { - "epoch": 1.032196467405718, - "grad_norm": 11.106419563293457, - "learning_rate": 3.279672554323804e-05, - "loss": 0.5986, - "step": 116760 - }, - { - "epoch": 1.0322848706660301, - "grad_norm": 2.127530336380005, - "learning_rate": 3.279525215556616e-05, - "loss": 0.5492, - "step": 116770 - }, - { - "epoch": 1.0323732739263425, - "grad_norm": 4.33651065826416, - "learning_rate": 3.27937787678943e-05, - "loss": 0.7341, - "step": 116780 - }, - { - "epoch": 1.0324616771866546, - "grad_norm": 2.457453966140747, - "learning_rate": 3.2792305380222426e-05, - "loss": 0.4417, - "step": 116790 - }, - { - "epoch": 1.032550080446967, - "grad_norm": 3.453371524810791, - "learning_rate": 3.2790831992550554e-05, - "loss": 0.7319, - "step": 116800 - }, - { - "epoch": 1.032638483707279, - "grad_norm": 4.092473030090332, - "learning_rate": 3.278935860487868e-05, - "loss": 0.5708, - "step": 116810 - }, - { - "epoch": 1.0327268869675914, - "grad_norm": 1.7105004787445068, - "learning_rate": 3.278788521720681e-05, - "loss": 0.5989, - "step": 116820 - }, - { - "epoch": 1.0328152902279035, - "grad_norm": 1.922426462173462, - "learning_rate": 3.278641182953494e-05, - "loss": 0.6356, - "step": 116830 - }, - { - "epoch": 1.0329036934882159, - "grad_norm": 2.488189935684204, - "learning_rate": 3.2784938441863074e-05, - "loss": 0.6216, - "step": 116840 - }, - { - "epoch": 1.032992096748528, - "grad_norm": 1.9522022008895874, - "learning_rate": 3.2783465054191196e-05, - "loss": 0.6034, - "step": 116850 - }, - { - "epoch": 1.0330805000088403, - "grad_norm": 2.601702928543091, - "learning_rate": 3.278199166651933e-05, - "loss": 0.5684, - "step": 116860 - }, - { - "epoch": 1.0331689032691527, - "grad_norm": 6.249575614929199, - "learning_rate": 3.278051827884746e-05, - "loss": 0.6326, - "step": 116870 - }, - { - "epoch": 1.0332573065294648, - "grad_norm": 20.06865692138672, - "learning_rate": 3.277904489117559e-05, - "loss": 0.7556, - "step": 116880 - }, - { - "epoch": 1.0333457097897771, - "grad_norm": 12.318852424621582, - "learning_rate": 3.2777571503503716e-05, - "loss": 0.5879, - "step": 116890 - }, - { - "epoch": 1.0334341130500893, - "grad_norm": 1.2416291236877441, - "learning_rate": 3.277609811583185e-05, - "loss": 0.6813, - "step": 116900 - }, - { - "epoch": 1.0335225163104016, - "grad_norm": 1.846293330192566, - "learning_rate": 3.277462472815997e-05, - "loss": 0.6278, - "step": 116910 - }, - { - "epoch": 1.0336109195707137, - "grad_norm": 1.836117148399353, - "learning_rate": 3.277315134048811e-05, - "loss": 0.5509, - "step": 116920 - }, - { - "epoch": 1.033699322831026, - "grad_norm": 4.629387855529785, - "learning_rate": 3.277167795281623e-05, - "loss": 0.5082, - "step": 116930 - }, - { - "epoch": 1.0337877260913382, - "grad_norm": 3.5430023670196533, - "learning_rate": 3.2770204565144365e-05, - "loss": 0.5197, - "step": 116940 - }, - { - "epoch": 1.0338761293516505, - "grad_norm": 1.8582254648208618, - "learning_rate": 3.276873117747249e-05, - "loss": 0.671, - "step": 116950 - }, - { - "epoch": 1.0339645326119626, - "grad_norm": 1.6037861108779907, - "learning_rate": 3.276725778980062e-05, - "loss": 0.6472, - "step": 116960 - }, - { - "epoch": 1.034052935872275, - "grad_norm": 2.811312198638916, - "learning_rate": 3.276578440212875e-05, - "loss": 0.5694, - "step": 116970 - }, - { - "epoch": 1.0341413391325873, - "grad_norm": 3.180572271347046, - "learning_rate": 3.2764311014456885e-05, - "loss": 0.6234, - "step": 116980 - }, - { - "epoch": 1.0342297423928994, - "grad_norm": 1.1608598232269287, - "learning_rate": 3.2762837626785006e-05, - "loss": 0.6262, - "step": 116990 - }, - { - "epoch": 1.0343181456532118, - "grad_norm": 1.9698179960250854, - "learning_rate": 3.276136423911314e-05, - "loss": 0.5939, - "step": 117000 - }, - { - "epoch": 1.034406548913524, - "grad_norm": 6.817419052124023, - "learning_rate": 3.275989085144127e-05, - "loss": 0.6763, - "step": 117010 - }, - { - "epoch": 1.0344949521738362, - "grad_norm": 1.9949711561203003, - "learning_rate": 3.27584174637694e-05, - "loss": 0.5865, - "step": 117020 - }, - { - "epoch": 1.0345833554341484, - "grad_norm": 2.875236988067627, - "learning_rate": 3.2756944076097526e-05, - "loss": 0.6031, - "step": 117030 - }, - { - "epoch": 1.0346717586944607, - "grad_norm": 0.9283199310302734, - "learning_rate": 3.275547068842566e-05, - "loss": 0.6082, - "step": 117040 - }, - { - "epoch": 1.0347601619547728, - "grad_norm": 4.239526748657227, - "learning_rate": 3.275399730075378e-05, - "loss": 0.6346, - "step": 117050 - }, - { - "epoch": 1.0348485652150852, - "grad_norm": 0.9630407094955444, - "learning_rate": 3.275252391308192e-05, - "loss": 0.5952, - "step": 117060 - }, - { - "epoch": 1.0349369684753973, - "grad_norm": 1.790809988975525, - "learning_rate": 3.275105052541004e-05, - "loss": 0.6504, - "step": 117070 - }, - { - "epoch": 1.0350253717357096, - "grad_norm": 1.949729084968567, - "learning_rate": 3.2749577137738175e-05, - "loss": 0.6687, - "step": 117080 - }, - { - "epoch": 1.0351137749960218, - "grad_norm": 5.460433006286621, - "learning_rate": 3.27481037500663e-05, - "loss": 0.6907, - "step": 117090 - }, - { - "epoch": 1.035202178256334, - "grad_norm": 2.627575635910034, - "learning_rate": 3.274663036239443e-05, - "loss": 0.6784, - "step": 117100 - }, - { - "epoch": 1.0352905815166464, - "grad_norm": 11.56816577911377, - "learning_rate": 3.274515697472256e-05, - "loss": 0.7326, - "step": 117110 - }, - { - "epoch": 1.0353789847769586, - "grad_norm": 1.409606695175171, - "learning_rate": 3.2743683587050695e-05, - "loss": 0.5791, - "step": 117120 - }, - { - "epoch": 1.035467388037271, - "grad_norm": 1.8347712755203247, - "learning_rate": 3.274221019937882e-05, - "loss": 0.7242, - "step": 117130 - }, - { - "epoch": 1.035555791297583, - "grad_norm": 4.0566277503967285, - "learning_rate": 3.274073681170695e-05, - "loss": 0.7123, - "step": 117140 - }, - { - "epoch": 1.0356441945578954, - "grad_norm": 2.90889048576355, - "learning_rate": 3.273926342403508e-05, - "loss": 0.5579, - "step": 117150 - }, - { - "epoch": 1.0357325978182075, - "grad_norm": 1.5285141468048096, - "learning_rate": 3.273779003636321e-05, - "loss": 0.5843, - "step": 117160 - }, - { - "epoch": 1.0358210010785198, - "grad_norm": 2.2421305179595947, - "learning_rate": 3.273631664869134e-05, - "loss": 0.7018, - "step": 117170 - }, - { - "epoch": 1.035909404338832, - "grad_norm": 3.8225080966949463, - "learning_rate": 3.2734843261019465e-05, - "loss": 0.6318, - "step": 117180 - }, - { - "epoch": 1.0359978075991443, - "grad_norm": 8.156444549560547, - "learning_rate": 3.2733369873347594e-05, - "loss": 0.6397, - "step": 117190 - }, - { - "epoch": 1.0360862108594564, - "grad_norm": 7.463613510131836, - "learning_rate": 3.273189648567573e-05, - "loss": 0.6656, - "step": 117200 - }, - { - "epoch": 1.0361746141197687, - "grad_norm": 1.971327543258667, - "learning_rate": 3.273042309800386e-05, - "loss": 0.551, - "step": 117210 - }, - { - "epoch": 1.0362630173800809, - "grad_norm": 1.6811615228652954, - "learning_rate": 3.2728949710331986e-05, - "loss": 0.642, - "step": 117220 - }, - { - "epoch": 1.0363514206403932, - "grad_norm": 2.060220241546631, - "learning_rate": 3.2727476322660114e-05, - "loss": 0.6732, - "step": 117230 - }, - { - "epoch": 1.0364398239007055, - "grad_norm": 1.6338359117507935, - "learning_rate": 3.272600293498824e-05, - "loss": 0.6826, - "step": 117240 - }, - { - "epoch": 1.0365282271610177, - "grad_norm": 2.3002490997314453, - "learning_rate": 3.272452954731637e-05, - "loss": 0.576, - "step": 117250 - }, - { - "epoch": 1.03661663042133, - "grad_norm": 1.0325394868850708, - "learning_rate": 3.2723056159644506e-05, - "loss": 0.6372, - "step": 117260 - }, - { - "epoch": 1.0367050336816421, - "grad_norm": 1.868433952331543, - "learning_rate": 3.2721582771972634e-05, - "loss": 0.6853, - "step": 117270 - }, - { - "epoch": 1.0367934369419545, - "grad_norm": 1.6474049091339111, - "learning_rate": 3.272010938430076e-05, - "loss": 0.5646, - "step": 117280 - }, - { - "epoch": 1.0368818402022666, - "grad_norm": 2.120286703109741, - "learning_rate": 3.271863599662889e-05, - "loss": 0.6415, - "step": 117290 - }, - { - "epoch": 1.036970243462579, - "grad_norm": 3.433964729309082, - "learning_rate": 3.271716260895702e-05, - "loss": 0.6083, - "step": 117300 - }, - { - "epoch": 1.037058646722891, - "grad_norm": 3.581944704055786, - "learning_rate": 3.271568922128515e-05, - "loss": 0.5799, - "step": 117310 - }, - { - "epoch": 1.0371470499832034, - "grad_norm": 3.5881195068359375, - "learning_rate": 3.2714215833613276e-05, - "loss": 0.518, - "step": 117320 - }, - { - "epoch": 1.0372354532435155, - "grad_norm": 5.122342586517334, - "learning_rate": 3.271274244594141e-05, - "loss": 0.6346, - "step": 117330 - }, - { - "epoch": 1.0373238565038279, - "grad_norm": 5.392604351043701, - "learning_rate": 3.271126905826954e-05, - "loss": 0.6113, - "step": 117340 - }, - { - "epoch": 1.0374122597641402, - "grad_norm": 4.673681259155273, - "learning_rate": 3.270979567059767e-05, - "loss": 0.5359, - "step": 117350 - }, - { - "epoch": 1.0375006630244523, - "grad_norm": 3.5873610973358154, - "learning_rate": 3.2708322282925796e-05, - "loss": 0.6046, - "step": 117360 - }, - { - "epoch": 1.0375890662847647, - "grad_norm": 16.325695037841797, - "learning_rate": 3.2706848895253924e-05, - "loss": 0.6831, - "step": 117370 - }, - { - "epoch": 1.0376774695450768, - "grad_norm": 2.7966771125793457, - "learning_rate": 3.270537550758205e-05, - "loss": 0.667, - "step": 117380 - }, - { - "epoch": 1.0377658728053891, - "grad_norm": 1.5073250532150269, - "learning_rate": 3.270390211991019e-05, - "loss": 0.589, - "step": 117390 - }, - { - "epoch": 1.0378542760657012, - "grad_norm": 8.531837463378906, - "learning_rate": 3.270242873223831e-05, - "loss": 0.5831, - "step": 117400 - }, - { - "epoch": 1.0379426793260136, - "grad_norm": 7.171672821044922, - "learning_rate": 3.2700955344566445e-05, - "loss": 0.6154, - "step": 117410 - }, - { - "epoch": 1.0380310825863257, - "grad_norm": 10.264817237854004, - "learning_rate": 3.269948195689457e-05, - "loss": 0.7075, - "step": 117420 - }, - { - "epoch": 1.038119485846638, - "grad_norm": 4.6835713386535645, - "learning_rate": 3.26980085692227e-05, - "loss": 0.582, - "step": 117430 - }, - { - "epoch": 1.0382078891069502, - "grad_norm": 1.2939256429672241, - "learning_rate": 3.269653518155083e-05, - "loss": 0.6365, - "step": 117440 - }, - { - "epoch": 1.0382962923672625, - "grad_norm": 3.343648672103882, - "learning_rate": 3.2695061793878965e-05, - "loss": 0.7446, - "step": 117450 - }, - { - "epoch": 1.0383846956275748, - "grad_norm": 3.8489580154418945, - "learning_rate": 3.2693588406207086e-05, - "loss": 0.6648, - "step": 117460 - }, - { - "epoch": 1.038473098887887, - "grad_norm": 4.065408229827881, - "learning_rate": 3.269211501853522e-05, - "loss": 0.4903, - "step": 117470 - }, - { - "epoch": 1.0385615021481993, - "grad_norm": 11.971077919006348, - "learning_rate": 3.269064163086335e-05, - "loss": 0.7453, - "step": 117480 - }, - { - "epoch": 1.0386499054085114, - "grad_norm": 2.102881669998169, - "learning_rate": 3.268916824319148e-05, - "loss": 0.5602, - "step": 117490 - }, - { - "epoch": 1.0387383086688238, - "grad_norm": 2.289872646331787, - "learning_rate": 3.2687694855519607e-05, - "loss": 0.6237, - "step": 117500 - }, - { - "epoch": 1.038826711929136, - "grad_norm": 1.8100292682647705, - "learning_rate": 3.268622146784774e-05, - "loss": 0.6389, - "step": 117510 - }, - { - "epoch": 1.0389151151894482, - "grad_norm": 2.585407018661499, - "learning_rate": 3.268474808017586e-05, - "loss": 0.5156, - "step": 117520 - }, - { - "epoch": 1.0390035184497604, - "grad_norm": 23.623857498168945, - "learning_rate": 3.2683274692504e-05, - "loss": 0.5978, - "step": 117530 - }, - { - "epoch": 1.0390919217100727, - "grad_norm": 1.6409337520599365, - "learning_rate": 3.268180130483212e-05, - "loss": 0.6086, - "step": 117540 - }, - { - "epoch": 1.0391803249703848, - "grad_norm": 1.6514098644256592, - "learning_rate": 3.2680327917160255e-05, - "loss": 0.6996, - "step": 117550 - }, - { - "epoch": 1.0392687282306972, - "grad_norm": 4.418907642364502, - "learning_rate": 3.2678854529488383e-05, - "loss": 0.5801, - "step": 117560 - }, - { - "epoch": 1.0393571314910095, - "grad_norm": 2.738635540008545, - "learning_rate": 3.267738114181651e-05, - "loss": 0.4338, - "step": 117570 - }, - { - "epoch": 1.0394455347513216, - "grad_norm": 1.4246604442596436, - "learning_rate": 3.267590775414464e-05, - "loss": 0.6458, - "step": 117580 - }, - { - "epoch": 1.039533938011634, - "grad_norm": 1.2119765281677246, - "learning_rate": 3.2674434366472775e-05, - "loss": 0.6196, - "step": 117590 - }, - { - "epoch": 1.039622341271946, - "grad_norm": 1.0284359455108643, - "learning_rate": 3.26729609788009e-05, - "loss": 0.5413, - "step": 117600 - }, - { - "epoch": 1.0397107445322584, - "grad_norm": 4.031009197235107, - "learning_rate": 3.267148759112903e-05, - "loss": 0.7052, - "step": 117610 - }, - { - "epoch": 1.0397991477925705, - "grad_norm": 1.655850887298584, - "learning_rate": 3.267001420345716e-05, - "loss": 0.5698, - "step": 117620 - }, - { - "epoch": 1.0398875510528829, - "grad_norm": 33.166297912597656, - "learning_rate": 3.266854081578529e-05, - "loss": 0.7216, - "step": 117630 - }, - { - "epoch": 1.039975954313195, - "grad_norm": 1.8915823698043823, - "learning_rate": 3.266706742811342e-05, - "loss": 0.7542, - "step": 117640 - }, - { - "epoch": 1.0400643575735073, - "grad_norm": 1.5253355503082275, - "learning_rate": 3.2665594040441545e-05, - "loss": 0.6712, - "step": 117650 - }, - { - "epoch": 1.0401527608338195, - "grad_norm": 3.1630632877349854, - "learning_rate": 3.2664120652769674e-05, - "loss": 0.656, - "step": 117660 - }, - { - "epoch": 1.0402411640941318, - "grad_norm": 4.646976470947266, - "learning_rate": 3.266264726509781e-05, - "loss": 0.6421, - "step": 117670 - }, - { - "epoch": 1.040329567354444, - "grad_norm": 1.5237339735031128, - "learning_rate": 3.266117387742593e-05, - "loss": 0.6431, - "step": 117680 - }, - { - "epoch": 1.0404179706147563, - "grad_norm": 1.745900273323059, - "learning_rate": 3.2659700489754066e-05, - "loss": 0.4945, - "step": 117690 - }, - { - "epoch": 1.0405063738750686, - "grad_norm": 4.8196892738342285, - "learning_rate": 3.2658227102082194e-05, - "loss": 0.5877, - "step": 117700 - }, - { - "epoch": 1.0405947771353807, - "grad_norm": 3.6238794326782227, - "learning_rate": 3.265675371441032e-05, - "loss": 0.7393, - "step": 117710 - }, - { - "epoch": 1.040683180395693, - "grad_norm": 6.0012664794921875, - "learning_rate": 3.265528032673845e-05, - "loss": 0.7391, - "step": 117720 - }, - { - "epoch": 1.0407715836560052, - "grad_norm": 4.358944892883301, - "learning_rate": 3.2653806939066586e-05, - "loss": 0.6603, - "step": 117730 - }, - { - "epoch": 1.0408599869163175, - "grad_norm": 7.1416826248168945, - "learning_rate": 3.265233355139471e-05, - "loss": 0.6724, - "step": 117740 - }, - { - "epoch": 1.0409483901766297, - "grad_norm": 8.007926940917969, - "learning_rate": 3.265086016372284e-05, - "loss": 0.6326, - "step": 117750 - }, - { - "epoch": 1.041036793436942, - "grad_norm": 2.640085458755493, - "learning_rate": 3.2649386776050964e-05, - "loss": 0.529, - "step": 117760 - }, - { - "epoch": 1.0411251966972541, - "grad_norm": 2.1877005100250244, - "learning_rate": 3.26479133883791e-05, - "loss": 0.596, - "step": 117770 - }, - { - "epoch": 1.0412135999575665, - "grad_norm": 1.1778029203414917, - "learning_rate": 3.264644000070723e-05, - "loss": 0.6291, - "step": 117780 - }, - { - "epoch": 1.0413020032178786, - "grad_norm": 4.867035388946533, - "learning_rate": 3.2644966613035356e-05, - "loss": 0.6043, - "step": 117790 - }, - { - "epoch": 1.041390406478191, - "grad_norm": 3.264575719833374, - "learning_rate": 3.2643493225363484e-05, - "loss": 0.5568, - "step": 117800 - }, - { - "epoch": 1.041478809738503, - "grad_norm": 1.502008318901062, - "learning_rate": 3.264201983769162e-05, - "loss": 0.581, - "step": 117810 - }, - { - "epoch": 1.0415672129988154, - "grad_norm": 9.617237091064453, - "learning_rate": 3.264054645001974e-05, - "loss": 0.7682, - "step": 117820 - }, - { - "epoch": 1.0416556162591277, - "grad_norm": 1.3119839429855347, - "learning_rate": 3.2639073062347876e-05, - "loss": 0.6426, - "step": 117830 - }, - { - "epoch": 1.0417440195194398, - "grad_norm": 3.6332430839538574, - "learning_rate": 3.2637599674676004e-05, - "loss": 0.6073, - "step": 117840 - }, - { - "epoch": 1.0418324227797522, - "grad_norm": 8.436199188232422, - "learning_rate": 3.263612628700413e-05, - "loss": 0.5925, - "step": 117850 - }, - { - "epoch": 1.0419208260400643, - "grad_norm": 2.1173672676086426, - "learning_rate": 3.263465289933226e-05, - "loss": 0.5255, - "step": 117860 - }, - { - "epoch": 1.0420092293003766, - "grad_norm": 1.4945423603057861, - "learning_rate": 3.263317951166039e-05, - "loss": 0.6486, - "step": 117870 - }, - { - "epoch": 1.0420976325606888, - "grad_norm": 2.0368943214416504, - "learning_rate": 3.263170612398852e-05, - "loss": 0.6807, - "step": 117880 - }, - { - "epoch": 1.0421860358210011, - "grad_norm": 1.9873511791229248, - "learning_rate": 3.263023273631665e-05, - "loss": 0.5655, - "step": 117890 - }, - { - "epoch": 1.0422744390813132, - "grad_norm": 1.3288109302520752, - "learning_rate": 3.2628759348644775e-05, - "loss": 0.5924, - "step": 117900 - }, - { - "epoch": 1.0423628423416256, - "grad_norm": 1.703797459602356, - "learning_rate": 3.262728596097291e-05, - "loss": 0.6244, - "step": 117910 - }, - { - "epoch": 1.0424512456019377, - "grad_norm": 2.420076847076416, - "learning_rate": 3.262581257330104e-05, - "loss": 0.6413, - "step": 117920 - }, - { - "epoch": 1.04253964886225, - "grad_norm": 2.431633472442627, - "learning_rate": 3.2624339185629166e-05, - "loss": 0.6053, - "step": 117930 - }, - { - "epoch": 1.0426280521225624, - "grad_norm": 2.1759305000305176, - "learning_rate": 3.2622865797957295e-05, - "loss": 0.5221, - "step": 117940 - }, - { - "epoch": 1.0427164553828745, - "grad_norm": 1.887358546257019, - "learning_rate": 3.262139241028543e-05, - "loss": 0.6447, - "step": 117950 - }, - { - "epoch": 1.0428048586431868, - "grad_norm": 8.109214782714844, - "learning_rate": 3.261991902261355e-05, - "loss": 0.6483, - "step": 117960 - }, - { - "epoch": 1.042893261903499, - "grad_norm": 5.8749213218688965, - "learning_rate": 3.2618445634941687e-05, - "loss": 0.5661, - "step": 117970 - }, - { - "epoch": 1.0429816651638113, - "grad_norm": 2.1259536743164062, - "learning_rate": 3.2616972247269815e-05, - "loss": 0.5928, - "step": 117980 - }, - { - "epoch": 1.0430700684241234, - "grad_norm": 4.279666900634766, - "learning_rate": 3.261549885959794e-05, - "loss": 0.4649, - "step": 117990 - }, - { - "epoch": 1.0431584716844358, - "grad_norm": 7.866892337799072, - "learning_rate": 3.261402547192607e-05, - "loss": 0.6927, - "step": 118000 - }, - { - "epoch": 1.0432468749447479, - "grad_norm": 2.551658868789673, - "learning_rate": 3.26125520842542e-05, - "loss": 0.6887, - "step": 118010 - }, - { - "epoch": 1.0433352782050602, - "grad_norm": 1.9606891870498657, - "learning_rate": 3.261107869658233e-05, - "loss": 0.7013, - "step": 118020 - }, - { - "epoch": 1.0434236814653723, - "grad_norm": 32.96314239501953, - "learning_rate": 3.2609605308910464e-05, - "loss": 0.5399, - "step": 118030 - }, - { - "epoch": 1.0435120847256847, - "grad_norm": 5.241535186767578, - "learning_rate": 3.2608131921238585e-05, - "loss": 0.6744, - "step": 118040 - }, - { - "epoch": 1.043600487985997, - "grad_norm": 3.9287118911743164, - "learning_rate": 3.260665853356672e-05, - "loss": 0.6243, - "step": 118050 - }, - { - "epoch": 1.0436888912463091, - "grad_norm": 1.7416187524795532, - "learning_rate": 3.260518514589485e-05, - "loss": 0.4971, - "step": 118060 - }, - { - "epoch": 1.0437772945066215, - "grad_norm": 13.711721420288086, - "learning_rate": 3.260371175822298e-05, - "loss": 0.6676, - "step": 118070 - }, - { - "epoch": 1.0438656977669336, - "grad_norm": 1.176175594329834, - "learning_rate": 3.2602238370551105e-05, - "loss": 0.5755, - "step": 118080 - }, - { - "epoch": 1.043954101027246, - "grad_norm": 34.83610916137695, - "learning_rate": 3.260076498287924e-05, - "loss": 0.6186, - "step": 118090 - }, - { - "epoch": 1.044042504287558, - "grad_norm": 0.8576605319976807, - "learning_rate": 3.259929159520736e-05, - "loss": 0.5831, - "step": 118100 - }, - { - "epoch": 1.0441309075478704, - "grad_norm": 2.002018451690674, - "learning_rate": 3.25978182075355e-05, - "loss": 0.5897, - "step": 118110 - }, - { - "epoch": 1.0442193108081825, - "grad_norm": 6.9745097160339355, - "learning_rate": 3.2596344819863625e-05, - "loss": 0.5911, - "step": 118120 - }, - { - "epoch": 1.0443077140684949, - "grad_norm": 1.8782075643539429, - "learning_rate": 3.2594871432191754e-05, - "loss": 0.7535, - "step": 118130 - }, - { - "epoch": 1.044396117328807, - "grad_norm": 1.2439783811569214, - "learning_rate": 3.259339804451988e-05, - "loss": 0.6057, - "step": 118140 - }, - { - "epoch": 1.0444845205891193, - "grad_norm": 1.0344895124435425, - "learning_rate": 3.259192465684801e-05, - "loss": 0.7656, - "step": 118150 - }, - { - "epoch": 1.0445729238494317, - "grad_norm": 1.9495149850845337, - "learning_rate": 3.259045126917614e-05, - "loss": 0.5793, - "step": 118160 - }, - { - "epoch": 1.0446613271097438, - "grad_norm": 1.5701905488967896, - "learning_rate": 3.2588977881504274e-05, - "loss": 0.7641, - "step": 118170 - }, - { - "epoch": 1.0447497303700561, - "grad_norm": 1.7587635517120361, - "learning_rate": 3.25875044938324e-05, - "loss": 0.5941, - "step": 118180 - }, - { - "epoch": 1.0448381336303683, - "grad_norm": 5.947789192199707, - "learning_rate": 3.258603110616053e-05, - "loss": 0.6691, - "step": 118190 - }, - { - "epoch": 1.0449265368906806, - "grad_norm": 0.7765215635299683, - "learning_rate": 3.258455771848866e-05, - "loss": 0.6448, - "step": 118200 - }, - { - "epoch": 1.0450149401509927, - "grad_norm": 8.86922836303711, - "learning_rate": 3.258308433081679e-05, - "loss": 0.7149, - "step": 118210 - }, - { - "epoch": 1.045103343411305, - "grad_norm": 1.8575059175491333, - "learning_rate": 3.2581610943144916e-05, - "loss": 0.5772, - "step": 118220 - }, - { - "epoch": 1.0451917466716172, - "grad_norm": 2.029527425765991, - "learning_rate": 3.2580137555473044e-05, - "loss": 0.6826, - "step": 118230 - }, - { - "epoch": 1.0452801499319295, - "grad_norm": 7.09782075881958, - "learning_rate": 3.257866416780118e-05, - "loss": 0.6824, - "step": 118240 - }, - { - "epoch": 1.0453685531922416, - "grad_norm": 2.7990503311157227, - "learning_rate": 3.257719078012931e-05, - "loss": 0.6052, - "step": 118250 - }, - { - "epoch": 1.045456956452554, - "grad_norm": 1.5899012088775635, - "learning_rate": 3.2575717392457436e-05, - "loss": 0.5407, - "step": 118260 - }, - { - "epoch": 1.045545359712866, - "grad_norm": 4.548243999481201, - "learning_rate": 3.2574244004785564e-05, - "loss": 0.697, - "step": 118270 - }, - { - "epoch": 1.0456337629731784, - "grad_norm": 1.0858268737792969, - "learning_rate": 3.257277061711369e-05, - "loss": 0.578, - "step": 118280 - }, - { - "epoch": 1.0457221662334908, - "grad_norm": 1.5233114957809448, - "learning_rate": 3.257129722944182e-05, - "loss": 0.6481, - "step": 118290 - }, - { - "epoch": 1.045810569493803, - "grad_norm": 1.6378499269485474, - "learning_rate": 3.2569823841769956e-05, - "loss": 0.6644, - "step": 118300 - }, - { - "epoch": 1.0458989727541153, - "grad_norm": 1.4661781787872314, - "learning_rate": 3.2568350454098085e-05, - "loss": 0.8157, - "step": 118310 - }, - { - "epoch": 1.0459873760144274, - "grad_norm": 4.555196285247803, - "learning_rate": 3.256687706642621e-05, - "loss": 0.5554, - "step": 118320 - }, - { - "epoch": 1.0460757792747397, - "grad_norm": 1.831338882446289, - "learning_rate": 3.256540367875434e-05, - "loss": 0.5105, - "step": 118330 - }, - { - "epoch": 1.0461641825350518, - "grad_norm": 7.606119632720947, - "learning_rate": 3.256393029108247e-05, - "loss": 0.6715, - "step": 118340 - }, - { - "epoch": 1.0462525857953642, - "grad_norm": 3.658600091934204, - "learning_rate": 3.25624569034106e-05, - "loss": 0.5396, - "step": 118350 - }, - { - "epoch": 1.0463409890556763, - "grad_norm": 5.823617458343506, - "learning_rate": 3.256098351573873e-05, - "loss": 0.5467, - "step": 118360 - }, - { - "epoch": 1.0464293923159886, - "grad_norm": 1.1644865274429321, - "learning_rate": 3.2559510128066855e-05, - "loss": 0.6025, - "step": 118370 - }, - { - "epoch": 1.0465177955763008, - "grad_norm": 2.5406291484832764, - "learning_rate": 3.255803674039499e-05, - "loss": 0.5164, - "step": 118380 - }, - { - "epoch": 1.046606198836613, - "grad_norm": 1.1089091300964355, - "learning_rate": 3.255656335272312e-05, - "loss": 0.5679, - "step": 118390 - }, - { - "epoch": 1.0466946020969254, - "grad_norm": 1.1352068185806274, - "learning_rate": 3.2555089965051246e-05, - "loss": 0.6858, - "step": 118400 - }, - { - "epoch": 1.0467830053572376, - "grad_norm": 1.3964446783065796, - "learning_rate": 3.2553616577379375e-05, - "loss": 0.6755, - "step": 118410 - }, - { - "epoch": 1.04687140861755, - "grad_norm": 3.4785985946655273, - "learning_rate": 3.255214318970751e-05, - "loss": 0.6976, - "step": 118420 - }, - { - "epoch": 1.046959811877862, - "grad_norm": 2.8392281532287598, - "learning_rate": 3.255066980203563e-05, - "loss": 0.5557, - "step": 118430 - }, - { - "epoch": 1.0470482151381744, - "grad_norm": 2.3943850994110107, - "learning_rate": 3.254919641436377e-05, - "loss": 0.6604, - "step": 118440 - }, - { - "epoch": 1.0471366183984865, - "grad_norm": 0.850866973400116, - "learning_rate": 3.2547723026691895e-05, - "loss": 0.5476, - "step": 118450 - }, - { - "epoch": 1.0472250216587988, - "grad_norm": 4.029863357543945, - "learning_rate": 3.2546249639020023e-05, - "loss": 0.6471, - "step": 118460 - }, - { - "epoch": 1.047313424919111, - "grad_norm": 4.031404495239258, - "learning_rate": 3.254477625134815e-05, - "loss": 0.7169, - "step": 118470 - }, - { - "epoch": 1.0474018281794233, - "grad_norm": 26.501449584960938, - "learning_rate": 3.254330286367628e-05, - "loss": 0.5073, - "step": 118480 - }, - { - "epoch": 1.0474902314397354, - "grad_norm": 1.9022653102874756, - "learning_rate": 3.254182947600441e-05, - "loss": 0.5951, - "step": 118490 - }, - { - "epoch": 1.0475786347000478, - "grad_norm": 7.83197546005249, - "learning_rate": 3.2540356088332544e-05, - "loss": 0.6585, - "step": 118500 - }, - { - "epoch": 1.0476670379603599, - "grad_norm": 6.007931709289551, - "learning_rate": 3.2538882700660665e-05, - "loss": 0.5451, - "step": 118510 - }, - { - "epoch": 1.0477554412206722, - "grad_norm": 1.2372618913650513, - "learning_rate": 3.25374093129888e-05, - "loss": 0.603, - "step": 118520 - }, - { - "epoch": 1.0478438444809846, - "grad_norm": 5.3750529289245605, - "learning_rate": 3.253593592531693e-05, - "loss": 0.6387, - "step": 118530 - }, - { - "epoch": 1.0479322477412967, - "grad_norm": 5.821578502655029, - "learning_rate": 3.253446253764506e-05, - "loss": 0.6934, - "step": 118540 - }, - { - "epoch": 1.048020651001609, - "grad_norm": 2.2839174270629883, - "learning_rate": 3.2532989149973185e-05, - "loss": 0.6178, - "step": 118550 - }, - { - "epoch": 1.0481090542619211, - "grad_norm": 1.5846675634384155, - "learning_rate": 3.253151576230132e-05, - "loss": 0.5161, - "step": 118560 - }, - { - "epoch": 1.0481974575222335, - "grad_norm": 1.9797673225402832, - "learning_rate": 3.253004237462944e-05, - "loss": 0.646, - "step": 118570 - }, - { - "epoch": 1.0482858607825456, - "grad_norm": 2.532197952270508, - "learning_rate": 3.252856898695758e-05, - "loss": 0.6122, - "step": 118580 - }, - { - "epoch": 1.048374264042858, - "grad_norm": 2.0440354347229004, - "learning_rate": 3.25270955992857e-05, - "loss": 0.5084, - "step": 118590 - }, - { - "epoch": 1.04846266730317, - "grad_norm": 8.088627815246582, - "learning_rate": 3.2525622211613834e-05, - "loss": 0.7105, - "step": 118600 - }, - { - "epoch": 1.0485510705634824, - "grad_norm": 2.5125439167022705, - "learning_rate": 3.252414882394196e-05, - "loss": 0.6422, - "step": 118610 - }, - { - "epoch": 1.0486394738237945, - "grad_norm": 1.638609766960144, - "learning_rate": 3.252267543627009e-05, - "loss": 0.495, - "step": 118620 - }, - { - "epoch": 1.0487278770841069, - "grad_norm": 3.144679307937622, - "learning_rate": 3.252120204859822e-05, - "loss": 0.6835, - "step": 118630 - }, - { - "epoch": 1.0488162803444192, - "grad_norm": 1.8833526372909546, - "learning_rate": 3.2519728660926354e-05, - "loss": 0.6067, - "step": 118640 - }, - { - "epoch": 1.0489046836047313, - "grad_norm": 2.845440626144409, - "learning_rate": 3.2518255273254476e-05, - "loss": 0.6727, - "step": 118650 - }, - { - "epoch": 1.0489930868650437, - "grad_norm": 2.4671027660369873, - "learning_rate": 3.251678188558261e-05, - "loss": 0.6061, - "step": 118660 - }, - { - "epoch": 1.0490814901253558, - "grad_norm": 10.824947357177734, - "learning_rate": 3.251530849791074e-05, - "loss": 0.6677, - "step": 118670 - }, - { - "epoch": 1.0491698933856681, - "grad_norm": 2.396451473236084, - "learning_rate": 3.251383511023887e-05, - "loss": 0.6101, - "step": 118680 - }, - { - "epoch": 1.0492582966459802, - "grad_norm": 2.3407211303710938, - "learning_rate": 3.2512361722566996e-05, - "loss": 0.747, - "step": 118690 - }, - { - "epoch": 1.0493466999062926, - "grad_norm": 3.300262928009033, - "learning_rate": 3.2510888334895124e-05, - "loss": 0.6422, - "step": 118700 - }, - { - "epoch": 1.0494351031666047, - "grad_norm": 4.343479633331299, - "learning_rate": 3.250941494722325e-05, - "loss": 0.7417, - "step": 118710 - }, - { - "epoch": 1.049523506426917, - "grad_norm": 4.603010654449463, - "learning_rate": 3.250794155955139e-05, - "loss": 0.5915, - "step": 118720 - }, - { - "epoch": 1.0496119096872292, - "grad_norm": 6.133446216583252, - "learning_rate": 3.250646817187951e-05, - "loss": 0.6562, - "step": 118730 - }, - { - "epoch": 1.0497003129475415, - "grad_norm": 2.617239236831665, - "learning_rate": 3.2504994784207644e-05, - "loss": 0.6916, - "step": 118740 - }, - { - "epoch": 1.0497887162078539, - "grad_norm": 2.3791656494140625, - "learning_rate": 3.250352139653577e-05, - "loss": 0.4972, - "step": 118750 - }, - { - "epoch": 1.049877119468166, - "grad_norm": 1.3188098669052124, - "learning_rate": 3.25020480088639e-05, - "loss": 0.6578, - "step": 118760 - }, - { - "epoch": 1.0499655227284783, - "grad_norm": 0.976134181022644, - "learning_rate": 3.250057462119203e-05, - "loss": 0.7267, - "step": 118770 - }, - { - "epoch": 1.0500539259887904, - "grad_norm": 1.7477500438690186, - "learning_rate": 3.2499101233520165e-05, - "loss": 0.68, - "step": 118780 - }, - { - "epoch": 1.0501423292491028, - "grad_norm": 6.36392879486084, - "learning_rate": 3.2497627845848286e-05, - "loss": 0.6517, - "step": 118790 - }, - { - "epoch": 1.050230732509415, - "grad_norm": 1.692762017250061, - "learning_rate": 3.249615445817642e-05, - "loss": 0.6802, - "step": 118800 - }, - { - "epoch": 1.0503191357697272, - "grad_norm": 10.650903701782227, - "learning_rate": 3.249468107050454e-05, - "loss": 0.5884, - "step": 118810 - }, - { - "epoch": 1.0504075390300394, - "grad_norm": 1.98170006275177, - "learning_rate": 3.249320768283268e-05, - "loss": 0.6197, - "step": 118820 - }, - { - "epoch": 1.0504959422903517, - "grad_norm": 2.243741273880005, - "learning_rate": 3.2491734295160806e-05, - "loss": 0.4907, - "step": 118830 - }, - { - "epoch": 1.0505843455506638, - "grad_norm": 1.4422988891601562, - "learning_rate": 3.2490260907488935e-05, - "loss": 0.657, - "step": 118840 - }, - { - "epoch": 1.0506727488109762, - "grad_norm": 5.877725601196289, - "learning_rate": 3.248878751981706e-05, - "loss": 0.7138, - "step": 118850 - }, - { - "epoch": 1.0507611520712883, - "grad_norm": 2.1584677696228027, - "learning_rate": 3.24873141321452e-05, - "loss": 0.6772, - "step": 118860 - }, - { - "epoch": 1.0508495553316006, - "grad_norm": 3.422776699066162, - "learning_rate": 3.248584074447332e-05, - "loss": 0.6891, - "step": 118870 - }, - { - "epoch": 1.050937958591913, - "grad_norm": 1.1917446851730347, - "learning_rate": 3.2484367356801455e-05, - "loss": 0.708, - "step": 118880 - }, - { - "epoch": 1.051026361852225, - "grad_norm": 1.870484709739685, - "learning_rate": 3.248289396912958e-05, - "loss": 0.7486, - "step": 118890 - }, - { - "epoch": 1.0511147651125374, - "grad_norm": 2.3740108013153076, - "learning_rate": 3.248142058145771e-05, - "loss": 0.7176, - "step": 118900 - }, - { - "epoch": 1.0512031683728496, - "grad_norm": 1.7380127906799316, - "learning_rate": 3.247994719378584e-05, - "loss": 0.6192, - "step": 118910 - }, - { - "epoch": 1.051291571633162, - "grad_norm": 1.7060693502426147, - "learning_rate": 3.2478473806113975e-05, - "loss": 0.5793, - "step": 118920 - }, - { - "epoch": 1.051379974893474, - "grad_norm": 6.669757843017578, - "learning_rate": 3.24770004184421e-05, - "loss": 0.6941, - "step": 118930 - }, - { - "epoch": 1.0514683781537864, - "grad_norm": 1.8193747997283936, - "learning_rate": 3.247552703077023e-05, - "loss": 0.7079, - "step": 118940 - }, - { - "epoch": 1.0515567814140985, - "grad_norm": 2.433159112930298, - "learning_rate": 3.2474053643098353e-05, - "loss": 0.6772, - "step": 118950 - }, - { - "epoch": 1.0516451846744108, - "grad_norm": 2.7907907962799072, - "learning_rate": 3.247258025542649e-05, - "loss": 0.7304, - "step": 118960 - }, - { - "epoch": 1.051733587934723, - "grad_norm": 4.268134117126465, - "learning_rate": 3.247110686775462e-05, - "loss": 0.7282, - "step": 118970 - }, - { - "epoch": 1.0518219911950353, - "grad_norm": 1.9655283689498901, - "learning_rate": 3.2469633480082745e-05, - "loss": 0.5369, - "step": 118980 - }, - { - "epoch": 1.0519103944553476, - "grad_norm": 11.410774230957031, - "learning_rate": 3.2468160092410874e-05, - "loss": 0.5548, - "step": 118990 - }, - { - "epoch": 1.0519987977156597, - "grad_norm": 2.279615879058838, - "learning_rate": 3.246668670473901e-05, - "loss": 0.6723, - "step": 119000 - }, - { - "epoch": 1.052087200975972, - "grad_norm": 1.2958675622940063, - "learning_rate": 3.246521331706713e-05, - "loss": 0.5788, - "step": 119010 - }, - { - "epoch": 1.0521756042362842, - "grad_norm": 2.697662591934204, - "learning_rate": 3.2463739929395265e-05, - "loss": 0.7265, - "step": 119020 - }, - { - "epoch": 1.0522640074965965, - "grad_norm": 1.0697963237762451, - "learning_rate": 3.2462266541723394e-05, - "loss": 0.5311, - "step": 119030 - }, - { - "epoch": 1.0523524107569087, - "grad_norm": 5.604464054107666, - "learning_rate": 3.246079315405152e-05, - "loss": 0.5922, - "step": 119040 - }, - { - "epoch": 1.052440814017221, - "grad_norm": 1.7310692071914673, - "learning_rate": 3.245931976637965e-05, - "loss": 0.5441, - "step": 119050 - }, - { - "epoch": 1.0525292172775331, - "grad_norm": 6.643568992614746, - "learning_rate": 3.245784637870778e-05, - "loss": 0.5552, - "step": 119060 - }, - { - "epoch": 1.0526176205378455, - "grad_norm": 1.0637174844741821, - "learning_rate": 3.245637299103591e-05, - "loss": 0.6838, - "step": 119070 - }, - { - "epoch": 1.0527060237981576, - "grad_norm": 13.687317848205566, - "learning_rate": 3.245489960336404e-05, - "loss": 0.5748, - "step": 119080 - }, - { - "epoch": 1.05279442705847, - "grad_norm": 3.949272871017456, - "learning_rate": 3.245342621569217e-05, - "loss": 0.6729, - "step": 119090 - }, - { - "epoch": 1.052882830318782, - "grad_norm": 3.789527654647827, - "learning_rate": 3.24519528280203e-05, - "loss": 0.6706, - "step": 119100 - }, - { - "epoch": 1.0529712335790944, - "grad_norm": 11.215285301208496, - "learning_rate": 3.245047944034843e-05, - "loss": 0.7216, - "step": 119110 - }, - { - "epoch": 1.0530596368394067, - "grad_norm": 3.875169038772583, - "learning_rate": 3.2449006052676556e-05, - "loss": 0.7692, - "step": 119120 - }, - { - "epoch": 1.0531480400997189, - "grad_norm": 1.3836971521377563, - "learning_rate": 3.2447532665004684e-05, - "loss": 0.5809, - "step": 119130 - }, - { - "epoch": 1.0532364433600312, - "grad_norm": 6.706811428070068, - "learning_rate": 3.244605927733282e-05, - "loss": 0.6049, - "step": 119140 - }, - { - "epoch": 1.0533248466203433, - "grad_norm": 13.888518333435059, - "learning_rate": 3.244458588966095e-05, - "loss": 0.7284, - "step": 119150 - }, - { - "epoch": 1.0534132498806557, - "grad_norm": 2.408536911010742, - "learning_rate": 3.2443112501989076e-05, - "loss": 0.5536, - "step": 119160 - }, - { - "epoch": 1.0535016531409678, - "grad_norm": 1.611156940460205, - "learning_rate": 3.2441639114317204e-05, - "loss": 0.6307, - "step": 119170 - }, - { - "epoch": 1.0535900564012801, - "grad_norm": 4.852395057678223, - "learning_rate": 3.244016572664533e-05, - "loss": 0.612, - "step": 119180 - }, - { - "epoch": 1.0536784596615922, - "grad_norm": 2.170468807220459, - "learning_rate": 3.243869233897346e-05, - "loss": 0.6796, - "step": 119190 - }, - { - "epoch": 1.0537668629219046, - "grad_norm": 1.043799877166748, - "learning_rate": 3.243721895130159e-05, - "loss": 0.6356, - "step": 119200 - }, - { - "epoch": 1.0538552661822167, - "grad_norm": 2.0495386123657227, - "learning_rate": 3.2435745563629724e-05, - "loss": 0.5904, - "step": 119210 - }, - { - "epoch": 1.053943669442529, - "grad_norm": 1.8927379846572876, - "learning_rate": 3.243427217595785e-05, - "loss": 0.666, - "step": 119220 - }, - { - "epoch": 1.0540320727028414, - "grad_norm": 0.8466701507568359, - "learning_rate": 3.243279878828598e-05, - "loss": 0.6227, - "step": 119230 - }, - { - "epoch": 1.0541204759631535, - "grad_norm": 2.2930867671966553, - "learning_rate": 3.243132540061411e-05, - "loss": 0.5204, - "step": 119240 - }, - { - "epoch": 1.0542088792234658, - "grad_norm": 1.7285113334655762, - "learning_rate": 3.242985201294224e-05, - "loss": 0.5859, - "step": 119250 - }, - { - "epoch": 1.054297282483778, - "grad_norm": 2.9076006412506104, - "learning_rate": 3.2428378625270366e-05, - "loss": 0.5672, - "step": 119260 - }, - { - "epoch": 1.0543856857440903, - "grad_norm": 0.6604291200637817, - "learning_rate": 3.24269052375985e-05, - "loss": 0.6291, - "step": 119270 - }, - { - "epoch": 1.0544740890044024, - "grad_norm": 1.632657766342163, - "learning_rate": 3.242543184992662e-05, - "loss": 0.7196, - "step": 119280 - }, - { - "epoch": 1.0545624922647148, - "grad_norm": 1.1319701671600342, - "learning_rate": 3.242395846225476e-05, - "loss": 0.5293, - "step": 119290 - }, - { - "epoch": 1.0546508955250269, - "grad_norm": 15.328736305236816, - "learning_rate": 3.2422485074582886e-05, - "loss": 0.6006, - "step": 119300 - }, - { - "epoch": 1.0547392987853392, - "grad_norm": 3.335042953491211, - "learning_rate": 3.2421011686911015e-05, - "loss": 0.5151, - "step": 119310 - }, - { - "epoch": 1.0548277020456513, - "grad_norm": 7.990843772888184, - "learning_rate": 3.241953829923914e-05, - "loss": 0.6766, - "step": 119320 - }, - { - "epoch": 1.0549161053059637, - "grad_norm": 1.868437647819519, - "learning_rate": 3.241806491156728e-05, - "loss": 0.6583, - "step": 119330 - }, - { - "epoch": 1.055004508566276, - "grad_norm": 2.881113052368164, - "learning_rate": 3.24165915238954e-05, - "loss": 0.6838, - "step": 119340 - }, - { - "epoch": 1.0550929118265882, - "grad_norm": 1.1695398092269897, - "learning_rate": 3.2415118136223535e-05, - "loss": 0.6705, - "step": 119350 - }, - { - "epoch": 1.0551813150869005, - "grad_norm": 5.177979469299316, - "learning_rate": 3.241364474855166e-05, - "loss": 0.6029, - "step": 119360 - }, - { - "epoch": 1.0552697183472126, - "grad_norm": 1.2371217012405396, - "learning_rate": 3.241217136087979e-05, - "loss": 0.5284, - "step": 119370 - }, - { - "epoch": 1.055358121607525, - "grad_norm": 1.984904408454895, - "learning_rate": 3.241069797320792e-05, - "loss": 0.5742, - "step": 119380 - }, - { - "epoch": 1.055446524867837, - "grad_norm": 3.9826338291168213, - "learning_rate": 3.2409224585536055e-05, - "loss": 0.5881, - "step": 119390 - }, - { - "epoch": 1.0555349281281494, - "grad_norm": 3.7008163928985596, - "learning_rate": 3.240775119786418e-05, - "loss": 0.5789, - "step": 119400 - }, - { - "epoch": 1.0556233313884615, - "grad_norm": 2.044239044189453, - "learning_rate": 3.240627781019231e-05, - "loss": 0.7048, - "step": 119410 - }, - { - "epoch": 1.0557117346487739, - "grad_norm": 1.688833475112915, - "learning_rate": 3.2404804422520433e-05, - "loss": 0.5084, - "step": 119420 - }, - { - "epoch": 1.055800137909086, - "grad_norm": 3.701371192932129, - "learning_rate": 3.240333103484857e-05, - "loss": 0.728, - "step": 119430 - }, - { - "epoch": 1.0558885411693983, - "grad_norm": 3.2155213356018066, - "learning_rate": 3.24018576471767e-05, - "loss": 0.5807, - "step": 119440 - }, - { - "epoch": 1.0559769444297105, - "grad_norm": 1.2120938301086426, - "learning_rate": 3.2400384259504825e-05, - "loss": 0.6785, - "step": 119450 - }, - { - "epoch": 1.0560653476900228, - "grad_norm": 1.965158224105835, - "learning_rate": 3.2398910871832954e-05, - "loss": 0.6014, - "step": 119460 - }, - { - "epoch": 1.0561537509503351, - "grad_norm": 2.8948214054107666, - "learning_rate": 3.239743748416109e-05, - "loss": 0.7163, - "step": 119470 - }, - { - "epoch": 1.0562421542106473, - "grad_norm": 5.8671464920043945, - "learning_rate": 3.239596409648921e-05, - "loss": 0.5125, - "step": 119480 - }, - { - "epoch": 1.0563305574709596, - "grad_norm": 0.9219271540641785, - "learning_rate": 3.2394490708817345e-05, - "loss": 0.5515, - "step": 119490 - }, - { - "epoch": 1.0564189607312717, - "grad_norm": 3.2974612712860107, - "learning_rate": 3.2393017321145474e-05, - "loss": 0.6556, - "step": 119500 - }, - { - "epoch": 1.056507363991584, - "grad_norm": 11.104557991027832, - "learning_rate": 3.23915439334736e-05, - "loss": 0.5809, - "step": 119510 - }, - { - "epoch": 1.0565957672518962, - "grad_norm": 2.6774652004241943, - "learning_rate": 3.239007054580173e-05, - "loss": 0.548, - "step": 119520 - }, - { - "epoch": 1.0566841705122085, - "grad_norm": 2.163102149963379, - "learning_rate": 3.238859715812986e-05, - "loss": 0.5961, - "step": 119530 - }, - { - "epoch": 1.0567725737725207, - "grad_norm": 1.8730876445770264, - "learning_rate": 3.238712377045799e-05, - "loss": 0.5904, - "step": 119540 - }, - { - "epoch": 1.056860977032833, - "grad_norm": 6.378261089324951, - "learning_rate": 3.238565038278612e-05, - "loss": 0.6158, - "step": 119550 - }, - { - "epoch": 1.0569493802931451, - "grad_norm": 5.50390100479126, - "learning_rate": 3.2384176995114244e-05, - "loss": 0.5649, - "step": 119560 - }, - { - "epoch": 1.0570377835534575, - "grad_norm": 9.187122344970703, - "learning_rate": 3.238270360744238e-05, - "loss": 0.7454, - "step": 119570 - }, - { - "epoch": 1.0571261868137698, - "grad_norm": 1.2036949396133423, - "learning_rate": 3.238123021977051e-05, - "loss": 0.6886, - "step": 119580 - }, - { - "epoch": 1.057214590074082, - "grad_norm": 3.5661401748657227, - "learning_rate": 3.2379756832098636e-05, - "loss": 0.6194, - "step": 119590 - }, - { - "epoch": 1.0573029933343943, - "grad_norm": 3.2739648818969727, - "learning_rate": 3.2378283444426764e-05, - "loss": 0.6981, - "step": 119600 - }, - { - "epoch": 1.0573913965947064, - "grad_norm": 1.8871715068817139, - "learning_rate": 3.23768100567549e-05, - "loss": 0.6083, - "step": 119610 - }, - { - "epoch": 1.0574797998550187, - "grad_norm": 2.985586643218994, - "learning_rate": 3.237533666908302e-05, - "loss": 0.5318, - "step": 119620 - }, - { - "epoch": 1.0575682031153308, - "grad_norm": 0.9269682765007019, - "learning_rate": 3.2373863281411156e-05, - "loss": 0.6014, - "step": 119630 - }, - { - "epoch": 1.0576566063756432, - "grad_norm": 5.7977614402771, - "learning_rate": 3.237238989373928e-05, - "loss": 0.4181, - "step": 119640 - }, - { - "epoch": 1.0577450096359553, - "grad_norm": 1.8269349336624146, - "learning_rate": 3.237091650606741e-05, - "loss": 0.5756, - "step": 119650 - }, - { - "epoch": 1.0578334128962676, - "grad_norm": 1.9745653867721558, - "learning_rate": 3.236944311839554e-05, - "loss": 0.4607, - "step": 119660 - }, - { - "epoch": 1.0579218161565798, - "grad_norm": 2.573300838470459, - "learning_rate": 3.236796973072367e-05, - "loss": 0.5911, - "step": 119670 - }, - { - "epoch": 1.058010219416892, - "grad_norm": 2.9967710971832275, - "learning_rate": 3.23664963430518e-05, - "loss": 0.5213, - "step": 119680 - }, - { - "epoch": 1.0580986226772042, - "grad_norm": 4.2053351402282715, - "learning_rate": 3.236502295537993e-05, - "loss": 0.7029, - "step": 119690 - }, - { - "epoch": 1.0581870259375166, - "grad_norm": 3.492554187774658, - "learning_rate": 3.2363549567708054e-05, - "loss": 0.4927, - "step": 119700 - }, - { - "epoch": 1.058275429197829, - "grad_norm": 1.8730839490890503, - "learning_rate": 3.236207618003619e-05, - "loss": 0.6672, - "step": 119710 - }, - { - "epoch": 1.058363832458141, - "grad_norm": 3.348025321960449, - "learning_rate": 3.236060279236432e-05, - "loss": 0.6587, - "step": 119720 - }, - { - "epoch": 1.0584522357184534, - "grad_norm": 4.149188995361328, - "learning_rate": 3.2359129404692446e-05, - "loss": 0.6755, - "step": 119730 - }, - { - "epoch": 1.0585406389787655, - "grad_norm": 2.0982747077941895, - "learning_rate": 3.2357656017020575e-05, - "loss": 0.6226, - "step": 119740 - }, - { - "epoch": 1.0586290422390778, - "grad_norm": 4.101964473724365, - "learning_rate": 3.235618262934871e-05, - "loss": 0.5546, - "step": 119750 - }, - { - "epoch": 1.05871744549939, - "grad_norm": 8.209115982055664, - "learning_rate": 3.235470924167683e-05, - "loss": 0.5523, - "step": 119760 - }, - { - "epoch": 1.0588058487597023, - "grad_norm": 2.536583185195923, - "learning_rate": 3.2353235854004967e-05, - "loss": 0.7637, - "step": 119770 - }, - { - "epoch": 1.0588942520200144, - "grad_norm": 7.836760997772217, - "learning_rate": 3.235176246633309e-05, - "loss": 0.5865, - "step": 119780 - }, - { - "epoch": 1.0589826552803268, - "grad_norm": 3.317887783050537, - "learning_rate": 3.235028907866122e-05, - "loss": 0.6094, - "step": 119790 - }, - { - "epoch": 1.0590710585406389, - "grad_norm": 4.125409126281738, - "learning_rate": 3.234881569098935e-05, - "loss": 0.7318, - "step": 119800 - }, - { - "epoch": 1.0591594618009512, - "grad_norm": 2.881061553955078, - "learning_rate": 3.234734230331748e-05, - "loss": 0.6077, - "step": 119810 - }, - { - "epoch": 1.0592478650612636, - "grad_norm": 4.462584018707275, - "learning_rate": 3.234586891564561e-05, - "loss": 0.6128, - "step": 119820 - }, - { - "epoch": 1.0593362683215757, - "grad_norm": 8.47276782989502, - "learning_rate": 3.2344395527973743e-05, - "loss": 0.5648, - "step": 119830 - }, - { - "epoch": 1.059424671581888, - "grad_norm": 9.020562171936035, - "learning_rate": 3.2342922140301865e-05, - "loss": 0.711, - "step": 119840 - }, - { - "epoch": 1.0595130748422001, - "grad_norm": 1.9493681192398071, - "learning_rate": 3.234144875263e-05, - "loss": 0.6099, - "step": 119850 - }, - { - "epoch": 1.0596014781025125, - "grad_norm": 4.458160877227783, - "learning_rate": 3.233997536495813e-05, - "loss": 0.6741, - "step": 119860 - }, - { - "epoch": 1.0596898813628246, - "grad_norm": 1.1083298921585083, - "learning_rate": 3.233850197728626e-05, - "loss": 0.5078, - "step": 119870 - }, - { - "epoch": 1.059778284623137, - "grad_norm": 6.4721198081970215, - "learning_rate": 3.2337028589614385e-05, - "loss": 0.7002, - "step": 119880 - }, - { - "epoch": 1.059866687883449, - "grad_norm": 1.5790786743164062, - "learning_rate": 3.2335555201942514e-05, - "loss": 0.6224, - "step": 119890 - }, - { - "epoch": 1.0599550911437614, - "grad_norm": 6.935812950134277, - "learning_rate": 3.233408181427064e-05, - "loss": 0.7024, - "step": 119900 - }, - { - "epoch": 1.0600434944040735, - "grad_norm": 4.4745774269104, - "learning_rate": 3.233260842659878e-05, - "loss": 0.6105, - "step": 119910 - }, - { - "epoch": 1.0601318976643859, - "grad_norm": 3.367515802383423, - "learning_rate": 3.23311350389269e-05, - "loss": 0.6949, - "step": 119920 - }, - { - "epoch": 1.0602203009246982, - "grad_norm": 4.029800891876221, - "learning_rate": 3.2329661651255034e-05, - "loss": 0.7179, - "step": 119930 - }, - { - "epoch": 1.0603087041850103, - "grad_norm": 0.9912646412849426, - "learning_rate": 3.232818826358316e-05, - "loss": 0.5529, - "step": 119940 - }, - { - "epoch": 1.0603971074453227, - "grad_norm": 1.3798308372497559, - "learning_rate": 3.232671487591129e-05, - "loss": 0.5124, - "step": 119950 - }, - { - "epoch": 1.0604855107056348, - "grad_norm": 4.9029107093811035, - "learning_rate": 3.232524148823942e-05, - "loss": 0.6106, - "step": 119960 - }, - { - "epoch": 1.0605739139659471, - "grad_norm": 4.14245080947876, - "learning_rate": 3.2323768100567554e-05, - "loss": 0.7063, - "step": 119970 - }, - { - "epoch": 1.0606623172262593, - "grad_norm": 5.5595383644104, - "learning_rate": 3.2322294712895675e-05, - "loss": 0.5531, - "step": 119980 - }, - { - "epoch": 1.0607507204865716, - "grad_norm": 20.607101440429688, - "learning_rate": 3.232082132522381e-05, - "loss": 0.5401, - "step": 119990 - }, - { - "epoch": 1.0608391237468837, - "grad_norm": 2.56923770904541, - "learning_rate": 3.231934793755194e-05, - "loss": 0.6642, - "step": 120000 - }, - { - "epoch": 1.060927527007196, - "grad_norm": 5.361979961395264, - "learning_rate": 3.231787454988007e-05, - "loss": 0.681, - "step": 120010 - }, - { - "epoch": 1.0610159302675082, - "grad_norm": 1.8949682712554932, - "learning_rate": 3.2316401162208196e-05, - "loss": 0.6631, - "step": 120020 - }, - { - "epoch": 1.0611043335278205, - "grad_norm": 3.042494297027588, - "learning_rate": 3.2314927774536324e-05, - "loss": 0.6607, - "step": 120030 - }, - { - "epoch": 1.0611927367881326, - "grad_norm": 4.636878967285156, - "learning_rate": 3.231345438686445e-05, - "loss": 0.5422, - "step": 120040 - }, - { - "epoch": 1.061281140048445, - "grad_norm": 1.388635516166687, - "learning_rate": 3.231198099919259e-05, - "loss": 0.6404, - "step": 120050 - }, - { - "epoch": 1.0613695433087573, - "grad_norm": 1.4575765132904053, - "learning_rate": 3.2310507611520716e-05, - "loss": 0.6218, - "step": 120060 - }, - { - "epoch": 1.0614579465690694, - "grad_norm": 1.6528077125549316, - "learning_rate": 3.2309034223848844e-05, - "loss": 0.6597, - "step": 120070 - }, - { - "epoch": 1.0615463498293818, - "grad_norm": 3.4966204166412354, - "learning_rate": 3.230756083617697e-05, - "loss": 0.6115, - "step": 120080 - }, - { - "epoch": 1.061634753089694, - "grad_norm": 5.340075492858887, - "learning_rate": 3.23060874485051e-05, - "loss": 0.6453, - "step": 120090 - }, - { - "epoch": 1.0617231563500062, - "grad_norm": 2.160996198654175, - "learning_rate": 3.230461406083323e-05, - "loss": 0.5929, - "step": 120100 - }, - { - "epoch": 1.0618115596103184, - "grad_norm": 5.821319103240967, - "learning_rate": 3.230314067316136e-05, - "loss": 0.5311, - "step": 120110 - }, - { - "epoch": 1.0618999628706307, - "grad_norm": 1.443834662437439, - "learning_rate": 3.230166728548949e-05, - "loss": 0.6031, - "step": 120120 - }, - { - "epoch": 1.0619883661309428, - "grad_norm": 7.692910194396973, - "learning_rate": 3.230019389781762e-05, - "loss": 0.6992, - "step": 120130 - }, - { - "epoch": 1.0620767693912552, - "grad_norm": 1.5878018140792847, - "learning_rate": 3.229872051014575e-05, - "loss": 0.6119, - "step": 120140 - }, - { - "epoch": 1.0621651726515673, - "grad_norm": 23.90019416809082, - "learning_rate": 3.229724712247388e-05, - "loss": 0.5547, - "step": 120150 - }, - { - "epoch": 1.0622535759118796, - "grad_norm": 1.5449013710021973, - "learning_rate": 3.2295773734802006e-05, - "loss": 0.6846, - "step": 120160 - }, - { - "epoch": 1.062341979172192, - "grad_norm": 3.1172685623168945, - "learning_rate": 3.2294300347130135e-05, - "loss": 0.7436, - "step": 120170 - }, - { - "epoch": 1.062430382432504, - "grad_norm": 2.0229194164276123, - "learning_rate": 3.229282695945827e-05, - "loss": 0.5896, - "step": 120180 - }, - { - "epoch": 1.0625187856928164, - "grad_norm": 4.894861221313477, - "learning_rate": 3.22913535717864e-05, - "loss": 0.7207, - "step": 120190 - }, - { - "epoch": 1.0626071889531286, - "grad_norm": 2.4821605682373047, - "learning_rate": 3.2289880184114526e-05, - "loss": 0.6091, - "step": 120200 - }, - { - "epoch": 1.062695592213441, - "grad_norm": 3.251113176345825, - "learning_rate": 3.2288406796442655e-05, - "loss": 0.5585, - "step": 120210 - }, - { - "epoch": 1.062783995473753, - "grad_norm": 32.71751403808594, - "learning_rate": 3.228693340877078e-05, - "loss": 0.5548, - "step": 120220 - }, - { - "epoch": 1.0628723987340654, - "grad_norm": 1.7661948204040527, - "learning_rate": 3.228546002109891e-05, - "loss": 0.6298, - "step": 120230 - }, - { - "epoch": 1.0629608019943775, - "grad_norm": 4.406103610992432, - "learning_rate": 3.2283986633427047e-05, - "loss": 0.7948, - "step": 120240 - }, - { - "epoch": 1.0630492052546898, - "grad_norm": 5.395301818847656, - "learning_rate": 3.228251324575517e-05, - "loss": 0.6325, - "step": 120250 - }, - { - "epoch": 1.063137608515002, - "grad_norm": 5.040433883666992, - "learning_rate": 3.22810398580833e-05, - "loss": 0.6932, - "step": 120260 - }, - { - "epoch": 1.0632260117753143, - "grad_norm": 6.978147029876709, - "learning_rate": 3.227956647041143e-05, - "loss": 0.5559, - "step": 120270 - }, - { - "epoch": 1.0633144150356264, - "grad_norm": 2.692667245864868, - "learning_rate": 3.227809308273956e-05, - "loss": 0.6051, - "step": 120280 - }, - { - "epoch": 1.0634028182959387, - "grad_norm": 2.461068630218506, - "learning_rate": 3.227661969506769e-05, - "loss": 0.6555, - "step": 120290 - }, - { - "epoch": 1.063491221556251, - "grad_norm": 1.7914044857025146, - "learning_rate": 3.2275146307395823e-05, - "loss": 0.5954, - "step": 120300 - }, - { - "epoch": 1.0635796248165632, - "grad_norm": 5.35980749130249, - "learning_rate": 3.2273672919723945e-05, - "loss": 0.5973, - "step": 120310 - }, - { - "epoch": 1.0636680280768755, - "grad_norm": 0.904769241809845, - "learning_rate": 3.227219953205208e-05, - "loss": 0.6075, - "step": 120320 - }, - { - "epoch": 1.0637564313371877, - "grad_norm": 3.791430950164795, - "learning_rate": 3.227072614438021e-05, - "loss": 0.43, - "step": 120330 - }, - { - "epoch": 1.0638448345975, - "grad_norm": 2.046313762664795, - "learning_rate": 3.226925275670834e-05, - "loss": 0.6327, - "step": 120340 - }, - { - "epoch": 1.0639332378578121, - "grad_norm": 1.3397361040115356, - "learning_rate": 3.2267779369036465e-05, - "loss": 0.7551, - "step": 120350 - }, - { - "epoch": 1.0640216411181245, - "grad_norm": 2.307652711868286, - "learning_rate": 3.2266305981364594e-05, - "loss": 0.6605, - "step": 120360 - }, - { - "epoch": 1.0641100443784366, - "grad_norm": 1.9020657539367676, - "learning_rate": 3.226483259369272e-05, - "loss": 0.6767, - "step": 120370 - }, - { - "epoch": 1.064198447638749, - "grad_norm": 4.0863776206970215, - "learning_rate": 3.226335920602086e-05, - "loss": 0.6498, - "step": 120380 - }, - { - "epoch": 1.064286850899061, - "grad_norm": 2.576895236968994, - "learning_rate": 3.226188581834898e-05, - "loss": 0.5399, - "step": 120390 - }, - { - "epoch": 1.0643752541593734, - "grad_norm": 5.870359897613525, - "learning_rate": 3.2260412430677114e-05, - "loss": 0.667, - "step": 120400 - }, - { - "epoch": 1.0644636574196857, - "grad_norm": 2.762570381164551, - "learning_rate": 3.225893904300524e-05, - "loss": 0.6036, - "step": 120410 - }, - { - "epoch": 1.0645520606799979, - "grad_norm": 4.400736331939697, - "learning_rate": 3.225746565533337e-05, - "loss": 0.7545, - "step": 120420 - }, - { - "epoch": 1.0646404639403102, - "grad_norm": 1.3446694612503052, - "learning_rate": 3.22559922676615e-05, - "loss": 0.6572, - "step": 120430 - }, - { - "epoch": 1.0647288672006223, - "grad_norm": 1.4495806694030762, - "learning_rate": 3.2254518879989634e-05, - "loss": 0.6116, - "step": 120440 - }, - { - "epoch": 1.0648172704609347, - "grad_norm": 5.972079753875732, - "learning_rate": 3.2253045492317756e-05, - "loss": 0.565, - "step": 120450 - }, - { - "epoch": 1.0649056737212468, - "grad_norm": 2.1598987579345703, - "learning_rate": 3.225157210464589e-05, - "loss": 0.766, - "step": 120460 - }, - { - "epoch": 1.0649940769815591, - "grad_norm": 1.5302361249923706, - "learning_rate": 3.225009871697401e-05, - "loss": 0.5278, - "step": 120470 - }, - { - "epoch": 1.0650824802418712, - "grad_norm": 3.2372963428497314, - "learning_rate": 3.224862532930215e-05, - "loss": 0.5398, - "step": 120480 - }, - { - "epoch": 1.0651708835021836, - "grad_norm": 7.0759100914001465, - "learning_rate": 3.2247151941630276e-05, - "loss": 0.5889, - "step": 120490 - }, - { - "epoch": 1.0652592867624957, - "grad_norm": 1.8381094932556152, - "learning_rate": 3.2245678553958404e-05, - "loss": 0.5886, - "step": 120500 - }, - { - "epoch": 1.065347690022808, - "grad_norm": 1.341376543045044, - "learning_rate": 3.224420516628653e-05, - "loss": 0.6705, - "step": 120510 - }, - { - "epoch": 1.0654360932831204, - "grad_norm": 12.450121879577637, - "learning_rate": 3.224273177861467e-05, - "loss": 0.708, - "step": 120520 - }, - { - "epoch": 1.0655244965434325, - "grad_norm": 4.1199212074279785, - "learning_rate": 3.224125839094279e-05, - "loss": 0.6721, - "step": 120530 - }, - { - "epoch": 1.0656128998037449, - "grad_norm": 4.740642070770264, - "learning_rate": 3.2239785003270924e-05, - "loss": 0.5963, - "step": 120540 - }, - { - "epoch": 1.065701303064057, - "grad_norm": 5.144943714141846, - "learning_rate": 3.223831161559905e-05, - "loss": 0.662, - "step": 120550 - }, - { - "epoch": 1.0657897063243693, - "grad_norm": 1.3962523937225342, - "learning_rate": 3.223683822792718e-05, - "loss": 0.6482, - "step": 120560 - }, - { - "epoch": 1.0658781095846814, - "grad_norm": 2.8582942485809326, - "learning_rate": 3.223536484025531e-05, - "loss": 0.4729, - "step": 120570 - }, - { - "epoch": 1.0659665128449938, - "grad_norm": 4.132655620574951, - "learning_rate": 3.223389145258344e-05, - "loss": 0.5207, - "step": 120580 - }, - { - "epoch": 1.066054916105306, - "grad_norm": 1.2919360399246216, - "learning_rate": 3.2232418064911566e-05, - "loss": 0.5604, - "step": 120590 - }, - { - "epoch": 1.0661433193656182, - "grad_norm": 2.8105602264404297, - "learning_rate": 3.22309446772397e-05, - "loss": 0.4947, - "step": 120600 - }, - { - "epoch": 1.0662317226259304, - "grad_norm": 7.603043079376221, - "learning_rate": 3.222947128956782e-05, - "loss": 0.6662, - "step": 120610 - }, - { - "epoch": 1.0663201258862427, - "grad_norm": 10.433573722839355, - "learning_rate": 3.222799790189596e-05, - "loss": 0.5635, - "step": 120620 - }, - { - "epoch": 1.066408529146555, - "grad_norm": 2.4541211128234863, - "learning_rate": 3.2226524514224086e-05, - "loss": 0.7041, - "step": 120630 - }, - { - "epoch": 1.0664969324068672, - "grad_norm": 1.8980493545532227, - "learning_rate": 3.2225051126552215e-05, - "loss": 0.6814, - "step": 120640 - }, - { - "epoch": 1.0665853356671795, - "grad_norm": 1.6585372686386108, - "learning_rate": 3.222357773888034e-05, - "loss": 0.6179, - "step": 120650 - }, - { - "epoch": 1.0666737389274916, - "grad_norm": 17.13709831237793, - "learning_rate": 3.222210435120848e-05, - "loss": 0.5893, - "step": 120660 - }, - { - "epoch": 1.066762142187804, - "grad_norm": 2.4513256549835205, - "learning_rate": 3.22206309635366e-05, - "loss": 0.5877, - "step": 120670 - }, - { - "epoch": 1.066850545448116, - "grad_norm": 9.410117149353027, - "learning_rate": 3.2219157575864735e-05, - "loss": 0.6185, - "step": 120680 - }, - { - "epoch": 1.0669389487084284, - "grad_norm": 2.3887970447540283, - "learning_rate": 3.221768418819286e-05, - "loss": 0.5749, - "step": 120690 - }, - { - "epoch": 1.0670273519687405, - "grad_norm": 5.723911762237549, - "learning_rate": 3.221621080052099e-05, - "loss": 0.6326, - "step": 120700 - }, - { - "epoch": 1.0671157552290529, - "grad_norm": 1.5306992530822754, - "learning_rate": 3.221473741284912e-05, - "loss": 0.6017, - "step": 120710 - }, - { - "epoch": 1.067204158489365, - "grad_norm": 2.6558194160461426, - "learning_rate": 3.221326402517725e-05, - "loss": 0.6607, - "step": 120720 - }, - { - "epoch": 1.0672925617496773, - "grad_norm": 2.473825693130493, - "learning_rate": 3.2211790637505377e-05, - "loss": 0.7037, - "step": 120730 - }, - { - "epoch": 1.0673809650099895, - "grad_norm": 12.26204776763916, - "learning_rate": 3.221031724983351e-05, - "loss": 0.6037, - "step": 120740 - }, - { - "epoch": 1.0674693682703018, - "grad_norm": 5.587930679321289, - "learning_rate": 3.220884386216163e-05, - "loss": 0.5959, - "step": 120750 - }, - { - "epoch": 1.0675577715306142, - "grad_norm": 3.8034064769744873, - "learning_rate": 3.220737047448977e-05, - "loss": 0.6826, - "step": 120760 - }, - { - "epoch": 1.0676461747909263, - "grad_norm": 3.043522596359253, - "learning_rate": 3.22058970868179e-05, - "loss": 0.5302, - "step": 120770 - }, - { - "epoch": 1.0677345780512386, - "grad_norm": 6.027188777923584, - "learning_rate": 3.2204423699146025e-05, - "loss": 0.6712, - "step": 120780 - }, - { - "epoch": 1.0678229813115507, - "grad_norm": 1.6273082494735718, - "learning_rate": 3.2202950311474153e-05, - "loss": 0.704, - "step": 120790 - }, - { - "epoch": 1.067911384571863, - "grad_norm": 2.3920822143554688, - "learning_rate": 3.220147692380229e-05, - "loss": 0.622, - "step": 120800 - }, - { - "epoch": 1.0679997878321752, - "grad_norm": 0.9818435311317444, - "learning_rate": 3.220000353613041e-05, - "loss": 0.6715, - "step": 120810 - }, - { - "epoch": 1.0680881910924875, - "grad_norm": 1.0270146131515503, - "learning_rate": 3.2198530148458545e-05, - "loss": 0.6102, - "step": 120820 - }, - { - "epoch": 1.0681765943527997, - "grad_norm": 2.4802002906799316, - "learning_rate": 3.219705676078667e-05, - "loss": 0.6699, - "step": 120830 - }, - { - "epoch": 1.068264997613112, - "grad_norm": 3.1002514362335205, - "learning_rate": 3.21955833731148e-05, - "loss": 0.5078, - "step": 120840 - }, - { - "epoch": 1.0683534008734241, - "grad_norm": 3.872596502304077, - "learning_rate": 3.219410998544293e-05, - "loss": 0.6268, - "step": 120850 - }, - { - "epoch": 1.0684418041337365, - "grad_norm": 3.8102264404296875, - "learning_rate": 3.219263659777106e-05, - "loss": 0.4891, - "step": 120860 - }, - { - "epoch": 1.0685302073940486, - "grad_norm": 6.440011501312256, - "learning_rate": 3.219116321009919e-05, - "loss": 0.5027, - "step": 120870 - }, - { - "epoch": 1.068618610654361, - "grad_norm": 5.2556986808776855, - "learning_rate": 3.218968982242732e-05, - "loss": 0.6085, - "step": 120880 - }, - { - "epoch": 1.0687070139146733, - "grad_norm": 7.90283203125, - "learning_rate": 3.2188216434755444e-05, - "loss": 0.6949, - "step": 120890 - }, - { - "epoch": 1.0687954171749854, - "grad_norm": 3.3118958473205566, - "learning_rate": 3.218674304708358e-05, - "loss": 0.5001, - "step": 120900 - }, - { - "epoch": 1.0688838204352977, - "grad_norm": 5.0742597579956055, - "learning_rate": 3.218526965941171e-05, - "loss": 0.7215, - "step": 120910 - }, - { - "epoch": 1.0689722236956098, - "grad_norm": 1.7520378828048706, - "learning_rate": 3.2183796271739836e-05, - "loss": 0.7059, - "step": 120920 - }, - { - "epoch": 1.0690606269559222, - "grad_norm": 3.5206902027130127, - "learning_rate": 3.2182322884067964e-05, - "loss": 0.5361, - "step": 120930 - }, - { - "epoch": 1.0691490302162343, - "grad_norm": 2.8200526237487793, - "learning_rate": 3.218084949639609e-05, - "loss": 0.5159, - "step": 120940 - }, - { - "epoch": 1.0692374334765467, - "grad_norm": 1.680497169494629, - "learning_rate": 3.217937610872422e-05, - "loss": 0.5674, - "step": 120950 - }, - { - "epoch": 1.0693258367368588, - "grad_norm": 5.961465358734131, - "learning_rate": 3.2177902721052356e-05, - "loss": 0.6492, - "step": 120960 - }, - { - "epoch": 1.0694142399971711, - "grad_norm": 7.21779203414917, - "learning_rate": 3.2176429333380484e-05, - "loss": 0.6107, - "step": 120970 - }, - { - "epoch": 1.0695026432574832, - "grad_norm": 2.5119082927703857, - "learning_rate": 3.217495594570861e-05, - "loss": 0.5784, - "step": 120980 - }, - { - "epoch": 1.0695910465177956, - "grad_norm": 3.025531053543091, - "learning_rate": 3.217348255803674e-05, - "loss": 0.7293, - "step": 120990 - }, - { - "epoch": 1.069679449778108, - "grad_norm": 2.1070396900177, - "learning_rate": 3.217200917036487e-05, - "loss": 0.4889, - "step": 121000 - }, - { - "epoch": 1.06976785303842, - "grad_norm": 2.1777422428131104, - "learning_rate": 3.2170535782693e-05, - "loss": 0.6259, - "step": 121010 - }, - { - "epoch": 1.0698562562987324, - "grad_norm": 1.2348896265029907, - "learning_rate": 3.216906239502113e-05, - "loss": 0.6916, - "step": 121020 - }, - { - "epoch": 1.0699446595590445, - "grad_norm": 3.8984742164611816, - "learning_rate": 3.216758900734926e-05, - "loss": 0.7222, - "step": 121030 - }, - { - "epoch": 1.0700330628193568, - "grad_norm": 2.697364091873169, - "learning_rate": 3.216611561967739e-05, - "loss": 0.6438, - "step": 121040 - }, - { - "epoch": 1.070121466079669, - "grad_norm": 0.9914762377738953, - "learning_rate": 3.216464223200552e-05, - "loss": 0.5099, - "step": 121050 - }, - { - "epoch": 1.0702098693399813, - "grad_norm": 16.497310638427734, - "learning_rate": 3.2163168844333646e-05, - "loss": 0.5632, - "step": 121060 - }, - { - "epoch": 1.0702982726002934, - "grad_norm": 1.8060559034347534, - "learning_rate": 3.2161695456661774e-05, - "loss": 0.7179, - "step": 121070 - }, - { - "epoch": 1.0703866758606058, - "grad_norm": 20.95323944091797, - "learning_rate": 3.21602220689899e-05, - "loss": 0.6801, - "step": 121080 - }, - { - "epoch": 1.0704750791209179, - "grad_norm": 2.695711851119995, - "learning_rate": 3.215874868131804e-05, - "loss": 0.5828, - "step": 121090 - }, - { - "epoch": 1.0705634823812302, - "grad_norm": 3.5927395820617676, - "learning_rate": 3.2157275293646166e-05, - "loss": 0.5339, - "step": 121100 - }, - { - "epoch": 1.0706518856415426, - "grad_norm": 6.041247844696045, - "learning_rate": 3.2155801905974295e-05, - "loss": 0.764, - "step": 121110 - }, - { - "epoch": 1.0707402889018547, - "grad_norm": 1.805579662322998, - "learning_rate": 3.215432851830242e-05, - "loss": 0.5147, - "step": 121120 - }, - { - "epoch": 1.070828692162167, - "grad_norm": 1.332554817199707, - "learning_rate": 3.215285513063055e-05, - "loss": 0.6448, - "step": 121130 - }, - { - "epoch": 1.0709170954224791, - "grad_norm": 2.044205665588379, - "learning_rate": 3.215138174295868e-05, - "loss": 0.5545, - "step": 121140 - }, - { - "epoch": 1.0710054986827915, - "grad_norm": 21.489187240600586, - "learning_rate": 3.2149908355286815e-05, - "loss": 0.6201, - "step": 121150 - }, - { - "epoch": 1.0710939019431036, - "grad_norm": 11.531546592712402, - "learning_rate": 3.214843496761494e-05, - "loss": 0.6901, - "step": 121160 - }, - { - "epoch": 1.071182305203416, - "grad_norm": 3.9804844856262207, - "learning_rate": 3.214696157994307e-05, - "loss": 0.7121, - "step": 121170 - }, - { - "epoch": 1.071270708463728, - "grad_norm": 2.379244565963745, - "learning_rate": 3.21454881922712e-05, - "loss": 0.5671, - "step": 121180 - }, - { - "epoch": 1.0713591117240404, - "grad_norm": 4.033192157745361, - "learning_rate": 3.214401480459933e-05, - "loss": 0.5707, - "step": 121190 - }, - { - "epoch": 1.0714475149843525, - "grad_norm": 2.3262979984283447, - "learning_rate": 3.214254141692746e-05, - "loss": 0.5273, - "step": 121200 - }, - { - "epoch": 1.0715359182446649, - "grad_norm": 1.0184017419815063, - "learning_rate": 3.214106802925559e-05, - "loss": 0.6661, - "step": 121210 - }, - { - "epoch": 1.0716243215049772, - "grad_norm": 1.3592736721038818, - "learning_rate": 3.213959464158371e-05, - "loss": 0.5844, - "step": 121220 - }, - { - "epoch": 1.0717127247652893, - "grad_norm": 5.208528518676758, - "learning_rate": 3.213812125391185e-05, - "loss": 0.6802, - "step": 121230 - }, - { - "epoch": 1.0718011280256017, - "grad_norm": 1.4978290796279907, - "learning_rate": 3.213664786623998e-05, - "loss": 0.5807, - "step": 121240 - }, - { - "epoch": 1.0718895312859138, - "grad_norm": 1.2481811046600342, - "learning_rate": 3.2135174478568105e-05, - "loss": 0.4375, - "step": 121250 - }, - { - "epoch": 1.0719779345462261, - "grad_norm": 2.0746304988861084, - "learning_rate": 3.2133701090896234e-05, - "loss": 0.595, - "step": 121260 - }, - { - "epoch": 1.0720663378065383, - "grad_norm": 1.6982258558273315, - "learning_rate": 3.213222770322437e-05, - "loss": 0.7717, - "step": 121270 - }, - { - "epoch": 1.0721547410668506, - "grad_norm": 5.13646936416626, - "learning_rate": 3.213075431555249e-05, - "loss": 0.6305, - "step": 121280 - }, - { - "epoch": 1.0722431443271627, - "grad_norm": 10.087930679321289, - "learning_rate": 3.2129280927880625e-05, - "loss": 0.5816, - "step": 121290 - }, - { - "epoch": 1.072331547587475, - "grad_norm": 5.895126819610596, - "learning_rate": 3.212780754020875e-05, - "loss": 0.5815, - "step": 121300 - }, - { - "epoch": 1.0724199508477872, - "grad_norm": 4.897123336791992, - "learning_rate": 3.212633415253688e-05, - "loss": 0.7048, - "step": 121310 - }, - { - "epoch": 1.0725083541080995, - "grad_norm": 1.7410991191864014, - "learning_rate": 3.212486076486501e-05, - "loss": 0.5557, - "step": 121320 - }, - { - "epoch": 1.0725967573684116, - "grad_norm": 13.142781257629395, - "learning_rate": 3.212338737719314e-05, - "loss": 0.3966, - "step": 121330 - }, - { - "epoch": 1.072685160628724, - "grad_norm": 9.509994506835938, - "learning_rate": 3.212191398952127e-05, - "loss": 0.5738, - "step": 121340 - }, - { - "epoch": 1.0727735638890363, - "grad_norm": 1.4094538688659668, - "learning_rate": 3.21204406018494e-05, - "loss": 0.5842, - "step": 121350 - }, - { - "epoch": 1.0728619671493484, - "grad_norm": 1.9394299983978271, - "learning_rate": 3.2118967214177524e-05, - "loss": 0.4725, - "step": 121360 - }, - { - "epoch": 1.0729503704096608, - "grad_norm": 4.592030048370361, - "learning_rate": 3.211749382650566e-05, - "loss": 0.5502, - "step": 121370 - }, - { - "epoch": 1.073038773669973, - "grad_norm": 3.1156632900238037, - "learning_rate": 3.211602043883379e-05, - "loss": 0.6149, - "step": 121380 - }, - { - "epoch": 1.0731271769302853, - "grad_norm": 5.135519981384277, - "learning_rate": 3.2114547051161916e-05, - "loss": 0.756, - "step": 121390 - }, - { - "epoch": 1.0732155801905974, - "grad_norm": 2.4875688552856445, - "learning_rate": 3.2113073663490044e-05, - "loss": 0.6142, - "step": 121400 - }, - { - "epoch": 1.0733039834509097, - "grad_norm": 5.848193645477295, - "learning_rate": 3.211160027581817e-05, - "loss": 0.6173, - "step": 121410 - }, - { - "epoch": 1.0733923867112218, - "grad_norm": 4.377786636352539, - "learning_rate": 3.21101268881463e-05, - "loss": 0.6957, - "step": 121420 - }, - { - "epoch": 1.0734807899715342, - "grad_norm": 3.9862008094787598, - "learning_rate": 3.2108653500474436e-05, - "loss": 0.6682, - "step": 121430 - }, - { - "epoch": 1.0735691932318463, - "grad_norm": 3.1569674015045166, - "learning_rate": 3.210718011280256e-05, - "loss": 0.4815, - "step": 121440 - }, - { - "epoch": 1.0736575964921586, - "grad_norm": 2.537647008895874, - "learning_rate": 3.210570672513069e-05, - "loss": 0.6763, - "step": 121450 - }, - { - "epoch": 1.0737459997524708, - "grad_norm": 11.374505043029785, - "learning_rate": 3.210423333745882e-05, - "loss": 0.7222, - "step": 121460 - }, - { - "epoch": 1.073834403012783, - "grad_norm": 5.498156547546387, - "learning_rate": 3.210275994978695e-05, - "loss": 0.6443, - "step": 121470 - }, - { - "epoch": 1.0739228062730954, - "grad_norm": 2.8872296810150146, - "learning_rate": 3.210128656211508e-05, - "loss": 0.6018, - "step": 121480 - }, - { - "epoch": 1.0740112095334076, - "grad_norm": 4.210776329040527, - "learning_rate": 3.209981317444321e-05, - "loss": 0.6167, - "step": 121490 - }, - { - "epoch": 1.07409961279372, - "grad_norm": 7.615468502044678, - "learning_rate": 3.2098339786771334e-05, - "loss": 0.5823, - "step": 121500 - }, - { - "epoch": 1.074188016054032, - "grad_norm": 2.6012418270111084, - "learning_rate": 3.209686639909947e-05, - "loss": 0.6672, - "step": 121510 - }, - { - "epoch": 1.0742764193143444, - "grad_norm": 5.401417255401611, - "learning_rate": 3.209539301142759e-05, - "loss": 0.564, - "step": 121520 - }, - { - "epoch": 1.0743648225746565, - "grad_norm": 1.6750456094741821, - "learning_rate": 3.2093919623755726e-05, - "loss": 0.5829, - "step": 121530 - }, - { - "epoch": 1.0744532258349688, - "grad_norm": 4.977668762207031, - "learning_rate": 3.2092446236083855e-05, - "loss": 0.7314, - "step": 121540 - }, - { - "epoch": 1.074541629095281, - "grad_norm": 2.7716176509857178, - "learning_rate": 3.209097284841198e-05, - "loss": 0.5461, - "step": 121550 - }, - { - "epoch": 1.0746300323555933, - "grad_norm": 1.9521986246109009, - "learning_rate": 3.208949946074011e-05, - "loss": 0.5778, - "step": 121560 - }, - { - "epoch": 1.0747184356159054, - "grad_norm": 3.8622937202453613, - "learning_rate": 3.2088026073068246e-05, - "loss": 0.6507, - "step": 121570 - }, - { - "epoch": 1.0748068388762178, - "grad_norm": 5.4770050048828125, - "learning_rate": 3.208655268539637e-05, - "loss": 0.806, - "step": 121580 - }, - { - "epoch": 1.07489524213653, - "grad_norm": 3.598459482192993, - "learning_rate": 3.20850792977245e-05, - "loss": 0.6434, - "step": 121590 - }, - { - "epoch": 1.0749836453968422, - "grad_norm": 1.5761405229568481, - "learning_rate": 3.208360591005263e-05, - "loss": 0.6724, - "step": 121600 - }, - { - "epoch": 1.0750720486571546, - "grad_norm": 1.326784372329712, - "learning_rate": 3.208213252238076e-05, - "loss": 0.5272, - "step": 121610 - }, - { - "epoch": 1.0751604519174667, - "grad_norm": 2.9855453968048096, - "learning_rate": 3.208065913470889e-05, - "loss": 0.6267, - "step": 121620 - }, - { - "epoch": 1.075248855177779, - "grad_norm": 3.084684133529663, - "learning_rate": 3.207918574703702e-05, - "loss": 0.5896, - "step": 121630 - }, - { - "epoch": 1.0753372584380911, - "grad_norm": 1.775133728981018, - "learning_rate": 3.2077712359365145e-05, - "loss": 0.6063, - "step": 121640 - }, - { - "epoch": 1.0754256616984035, - "grad_norm": 1.709446668624878, - "learning_rate": 3.207623897169328e-05, - "loss": 0.6196, - "step": 121650 - }, - { - "epoch": 1.0755140649587156, - "grad_norm": 4.275025367736816, - "learning_rate": 3.20747655840214e-05, - "loss": 0.7465, - "step": 121660 - }, - { - "epoch": 1.075602468219028, - "grad_norm": 11.366464614868164, - "learning_rate": 3.207329219634954e-05, - "loss": 0.5256, - "step": 121670 - }, - { - "epoch": 1.07569087147934, - "grad_norm": 3.131870985031128, - "learning_rate": 3.2071818808677665e-05, - "loss": 0.5486, - "step": 121680 - }, - { - "epoch": 1.0757792747396524, - "grad_norm": 1.2570112943649292, - "learning_rate": 3.2070345421005793e-05, - "loss": 0.6025, - "step": 121690 - }, - { - "epoch": 1.0758676779999647, - "grad_norm": 4.750698089599609, - "learning_rate": 3.206887203333392e-05, - "loss": 0.6556, - "step": 121700 - }, - { - "epoch": 1.0759560812602769, - "grad_norm": 8.063404083251953, - "learning_rate": 3.206739864566206e-05, - "loss": 0.726, - "step": 121710 - }, - { - "epoch": 1.0760444845205892, - "grad_norm": 1.368151307106018, - "learning_rate": 3.206592525799018e-05, - "loss": 0.7945, - "step": 121720 - }, - { - "epoch": 1.0761328877809013, - "grad_norm": 7.499267101287842, - "learning_rate": 3.2064451870318314e-05, - "loss": 0.5429, - "step": 121730 - }, - { - "epoch": 1.0762212910412137, - "grad_norm": 1.5975914001464844, - "learning_rate": 3.206297848264644e-05, - "loss": 0.5326, - "step": 121740 - }, - { - "epoch": 1.0763096943015258, - "grad_norm": 13.910560607910156, - "learning_rate": 3.206150509497457e-05, - "loss": 0.737, - "step": 121750 - }, - { - "epoch": 1.0763980975618381, - "grad_norm": 6.845784664154053, - "learning_rate": 3.20600317073027e-05, - "loss": 0.6227, - "step": 121760 - }, - { - "epoch": 1.0764865008221502, - "grad_norm": 1.2261799573898315, - "learning_rate": 3.205855831963083e-05, - "loss": 0.7177, - "step": 121770 - }, - { - "epoch": 1.0765749040824626, - "grad_norm": 2.2650344371795654, - "learning_rate": 3.2057084931958955e-05, - "loss": 0.5767, - "step": 121780 - }, - { - "epoch": 1.0766633073427747, - "grad_norm": 3.7625882625579834, - "learning_rate": 3.205561154428709e-05, - "loss": 0.6843, - "step": 121790 - }, - { - "epoch": 1.076751710603087, - "grad_norm": 11.173534393310547, - "learning_rate": 3.205413815661521e-05, - "loss": 0.5714, - "step": 121800 - }, - { - "epoch": 1.0768401138633994, - "grad_norm": 0.8792701363563538, - "learning_rate": 3.205266476894335e-05, - "loss": 0.5123, - "step": 121810 - }, - { - "epoch": 1.0769285171237115, - "grad_norm": 5.793383598327637, - "learning_rate": 3.2051191381271476e-05, - "loss": 0.7082, - "step": 121820 - }, - { - "epoch": 1.0770169203840239, - "grad_norm": 1.818489909172058, - "learning_rate": 3.2049717993599604e-05, - "loss": 0.7048, - "step": 121830 - }, - { - "epoch": 1.077105323644336, - "grad_norm": 6.321500301361084, - "learning_rate": 3.204824460592773e-05, - "loss": 0.7024, - "step": 121840 - }, - { - "epoch": 1.0771937269046483, - "grad_norm": 1.1618351936340332, - "learning_rate": 3.204677121825587e-05, - "loss": 0.5693, - "step": 121850 - }, - { - "epoch": 1.0772821301649604, - "grad_norm": 6.2675628662109375, - "learning_rate": 3.204529783058399e-05, - "loss": 0.5162, - "step": 121860 - }, - { - "epoch": 1.0773705334252728, - "grad_norm": 16.915861129760742, - "learning_rate": 3.2043824442912124e-05, - "loss": 0.7185, - "step": 121870 - }, - { - "epoch": 1.077458936685585, - "grad_norm": 2.535393714904785, - "learning_rate": 3.204235105524025e-05, - "loss": 0.5321, - "step": 121880 - }, - { - "epoch": 1.0775473399458972, - "grad_norm": 3.225973129272461, - "learning_rate": 3.204087766756838e-05, - "loss": 0.7294, - "step": 121890 - }, - { - "epoch": 1.0776357432062094, - "grad_norm": 10.800483703613281, - "learning_rate": 3.203940427989651e-05, - "loss": 0.511, - "step": 121900 - }, - { - "epoch": 1.0777241464665217, - "grad_norm": 4.964085102081299, - "learning_rate": 3.203793089222464e-05, - "loss": 0.6674, - "step": 121910 - }, - { - "epoch": 1.0778125497268338, - "grad_norm": 5.181891918182373, - "learning_rate": 3.203645750455277e-05, - "loss": 0.6324, - "step": 121920 - }, - { - "epoch": 1.0779009529871462, - "grad_norm": 4.374396324157715, - "learning_rate": 3.20349841168809e-05, - "loss": 0.576, - "step": 121930 - }, - { - "epoch": 1.0779893562474585, - "grad_norm": 3.8809781074523926, - "learning_rate": 3.203351072920903e-05, - "loss": 0.7083, - "step": 121940 - }, - { - "epoch": 1.0780777595077706, - "grad_norm": 2.152778148651123, - "learning_rate": 3.203203734153716e-05, - "loss": 0.6069, - "step": 121950 - }, - { - "epoch": 1.078166162768083, - "grad_norm": 2.129201889038086, - "learning_rate": 3.2030563953865286e-05, - "loss": 0.6123, - "step": 121960 - }, - { - "epoch": 1.078254566028395, - "grad_norm": 2.8653624057769775, - "learning_rate": 3.2029090566193414e-05, - "loss": 0.724, - "step": 121970 - }, - { - "epoch": 1.0783429692887074, - "grad_norm": 11.684548377990723, - "learning_rate": 3.202761717852155e-05, - "loss": 0.7077, - "step": 121980 - }, - { - "epoch": 1.0784313725490196, - "grad_norm": 1.9912139177322388, - "learning_rate": 3.202614379084967e-05, - "loss": 0.625, - "step": 121990 - }, - { - "epoch": 1.078519775809332, - "grad_norm": 2.9863288402557373, - "learning_rate": 3.2024670403177806e-05, - "loss": 0.6493, - "step": 122000 - }, - { - "epoch": 1.078608179069644, - "grad_norm": 2.803328514099121, - "learning_rate": 3.2023197015505935e-05, - "loss": 0.6274, - "step": 122010 - }, - { - "epoch": 1.0786965823299564, - "grad_norm": 1.2812105417251587, - "learning_rate": 3.202172362783406e-05, - "loss": 0.6034, - "step": 122020 - }, - { - "epoch": 1.0787849855902685, - "grad_norm": 7.073108673095703, - "learning_rate": 3.202025024016219e-05, - "loss": 0.6396, - "step": 122030 - }, - { - "epoch": 1.0788733888505808, - "grad_norm": 16.532384872436523, - "learning_rate": 3.2018776852490326e-05, - "loss": 0.7696, - "step": 122040 - }, - { - "epoch": 1.078961792110893, - "grad_norm": 3.5699141025543213, - "learning_rate": 3.201730346481845e-05, - "loss": 0.5237, - "step": 122050 - }, - { - "epoch": 1.0790501953712053, - "grad_norm": 1.8562746047973633, - "learning_rate": 3.201583007714658e-05, - "loss": 0.5993, - "step": 122060 - }, - { - "epoch": 1.0791385986315176, - "grad_norm": 4.66828727722168, - "learning_rate": 3.201435668947471e-05, - "loss": 0.7343, - "step": 122070 - }, - { - "epoch": 1.0792270018918297, - "grad_norm": 1.4367125034332275, - "learning_rate": 3.201288330180284e-05, - "loss": 0.5483, - "step": 122080 - }, - { - "epoch": 1.079315405152142, - "grad_norm": 0.9155851006507874, - "learning_rate": 3.201140991413097e-05, - "loss": 0.6584, - "step": 122090 - }, - { - "epoch": 1.0794038084124542, - "grad_norm": 2.3527088165283203, - "learning_rate": 3.20099365264591e-05, - "loss": 0.5648, - "step": 122100 - }, - { - "epoch": 1.0794922116727665, - "grad_norm": 2.074800968170166, - "learning_rate": 3.2008463138787225e-05, - "loss": 0.588, - "step": 122110 - }, - { - "epoch": 1.0795806149330787, - "grad_norm": 1.8037925958633423, - "learning_rate": 3.200698975111536e-05, - "loss": 0.7356, - "step": 122120 - }, - { - "epoch": 1.079669018193391, - "grad_norm": 2.0469555854797363, - "learning_rate": 3.200551636344348e-05, - "loss": 0.5375, - "step": 122130 - }, - { - "epoch": 1.0797574214537031, - "grad_norm": 6.390101432800293, - "learning_rate": 3.200404297577162e-05, - "loss": 0.6544, - "step": 122140 - }, - { - "epoch": 1.0798458247140155, - "grad_norm": 1.8118375539779663, - "learning_rate": 3.2002569588099745e-05, - "loss": 0.5123, - "step": 122150 - }, - { - "epoch": 1.0799342279743276, - "grad_norm": 8.044949531555176, - "learning_rate": 3.2001096200427873e-05, - "loss": 0.6148, - "step": 122160 - }, - { - "epoch": 1.08002263123464, - "grad_norm": 1.4586153030395508, - "learning_rate": 3.1999622812756e-05, - "loss": 0.6151, - "step": 122170 - }, - { - "epoch": 1.0801110344949523, - "grad_norm": 1.2313237190246582, - "learning_rate": 3.199814942508414e-05, - "loss": 0.4891, - "step": 122180 - }, - { - "epoch": 1.0801994377552644, - "grad_norm": 1.6114076375961304, - "learning_rate": 3.199667603741226e-05, - "loss": 0.4885, - "step": 122190 - }, - { - "epoch": 1.0802878410155767, - "grad_norm": 2.5828280448913574, - "learning_rate": 3.1995202649740394e-05, - "loss": 0.7361, - "step": 122200 - }, - { - "epoch": 1.0803762442758889, - "grad_norm": 1.6665910482406616, - "learning_rate": 3.199372926206852e-05, - "loss": 0.5084, - "step": 122210 - }, - { - "epoch": 1.0804646475362012, - "grad_norm": 1.9073121547698975, - "learning_rate": 3.199225587439665e-05, - "loss": 0.5759, - "step": 122220 - }, - { - "epoch": 1.0805530507965133, - "grad_norm": 2.49973201751709, - "learning_rate": 3.199078248672478e-05, - "loss": 0.6122, - "step": 122230 - }, - { - "epoch": 1.0806414540568257, - "grad_norm": 3.6795437335968018, - "learning_rate": 3.198930909905291e-05, - "loss": 0.6624, - "step": 122240 - }, - { - "epoch": 1.0807298573171378, - "grad_norm": 2.7021732330322266, - "learning_rate": 3.1987835711381035e-05, - "loss": 0.5861, - "step": 122250 - }, - { - "epoch": 1.0808182605774501, - "grad_norm": 2.5378150939941406, - "learning_rate": 3.198636232370917e-05, - "loss": 0.6666, - "step": 122260 - }, - { - "epoch": 1.0809066638377622, - "grad_norm": 3.60380482673645, - "learning_rate": 3.198488893603729e-05, - "loss": 0.6491, - "step": 122270 - }, - { - "epoch": 1.0809950670980746, - "grad_norm": 2.004743814468384, - "learning_rate": 3.198341554836543e-05, - "loss": 0.6239, - "step": 122280 - }, - { - "epoch": 1.081083470358387, - "grad_norm": 1.2397043704986572, - "learning_rate": 3.1981942160693556e-05, - "loss": 0.5916, - "step": 122290 - }, - { - "epoch": 1.081171873618699, - "grad_norm": 0.6310939788818359, - "learning_rate": 3.1980468773021684e-05, - "loss": 0.5, - "step": 122300 - }, - { - "epoch": 1.0812602768790114, - "grad_norm": 2.923029661178589, - "learning_rate": 3.197899538534981e-05, - "loss": 0.5786, - "step": 122310 - }, - { - "epoch": 1.0813486801393235, - "grad_norm": 2.1095168590545654, - "learning_rate": 3.197752199767795e-05, - "loss": 0.6065, - "step": 122320 - }, - { - "epoch": 1.0814370833996358, - "grad_norm": 2.7881975173950195, - "learning_rate": 3.197604861000607e-05, - "loss": 0.6355, - "step": 122330 - }, - { - "epoch": 1.081525486659948, - "grad_norm": 0.8852866888046265, - "learning_rate": 3.1974575222334204e-05, - "loss": 0.6103, - "step": 122340 - }, - { - "epoch": 1.0816138899202603, - "grad_norm": 8.771408081054688, - "learning_rate": 3.1973101834662326e-05, - "loss": 0.7937, - "step": 122350 - }, - { - "epoch": 1.0817022931805724, - "grad_norm": 6.1663618087768555, - "learning_rate": 3.197162844699046e-05, - "loss": 0.5049, - "step": 122360 - }, - { - "epoch": 1.0817906964408848, - "grad_norm": 8.269588470458984, - "learning_rate": 3.197015505931859e-05, - "loss": 0.6602, - "step": 122370 - }, - { - "epoch": 1.081879099701197, - "grad_norm": 1.1021735668182373, - "learning_rate": 3.196868167164672e-05, - "loss": 0.553, - "step": 122380 - }, - { - "epoch": 1.0819675029615092, - "grad_norm": 1.3458079099655151, - "learning_rate": 3.1967208283974846e-05, - "loss": 0.4639, - "step": 122390 - }, - { - "epoch": 1.0820559062218216, - "grad_norm": 18.411046981811523, - "learning_rate": 3.196573489630298e-05, - "loss": 0.6744, - "step": 122400 - }, - { - "epoch": 1.0821443094821337, - "grad_norm": 3.0645389556884766, - "learning_rate": 3.19642615086311e-05, - "loss": 0.5915, - "step": 122410 - }, - { - "epoch": 1.082232712742446, - "grad_norm": 3.1344542503356934, - "learning_rate": 3.196278812095924e-05, - "loss": 0.7577, - "step": 122420 - }, - { - "epoch": 1.0823211160027582, - "grad_norm": 1.5618627071380615, - "learning_rate": 3.1961314733287366e-05, - "loss": 0.6805, - "step": 122430 - }, - { - "epoch": 1.0824095192630705, - "grad_norm": 2.5196473598480225, - "learning_rate": 3.1959841345615495e-05, - "loss": 0.6196, - "step": 122440 - }, - { - "epoch": 1.0824979225233826, - "grad_norm": 2.6964004039764404, - "learning_rate": 3.195836795794362e-05, - "loss": 0.6674, - "step": 122450 - }, - { - "epoch": 1.082586325783695, - "grad_norm": 3.5802650451660156, - "learning_rate": 3.195689457027176e-05, - "loss": 0.6871, - "step": 122460 - }, - { - "epoch": 1.082674729044007, - "grad_norm": 2.1361007690429688, - "learning_rate": 3.195542118259988e-05, - "loss": 0.5687, - "step": 122470 - }, - { - "epoch": 1.0827631323043194, - "grad_norm": 2.30440354347229, - "learning_rate": 3.1953947794928015e-05, - "loss": 0.5267, - "step": 122480 - }, - { - "epoch": 1.0828515355646315, - "grad_norm": 1.0843998193740845, - "learning_rate": 3.1952474407256136e-05, - "loss": 0.5372, - "step": 122490 - }, - { - "epoch": 1.0829399388249439, - "grad_norm": 1.6311516761779785, - "learning_rate": 3.195100101958427e-05, - "loss": 0.6458, - "step": 122500 - }, - { - "epoch": 1.083028342085256, - "grad_norm": 5.391809940338135, - "learning_rate": 3.19495276319124e-05, - "loss": 0.6708, - "step": 122510 - }, - { - "epoch": 1.0831167453455683, - "grad_norm": 2.112456798553467, - "learning_rate": 3.194805424424053e-05, - "loss": 0.5991, - "step": 122520 - }, - { - "epoch": 1.0832051486058807, - "grad_norm": 2.9221577644348145, - "learning_rate": 3.1946580856568656e-05, - "loss": 0.7105, - "step": 122530 - }, - { - "epoch": 1.0832935518661928, - "grad_norm": 2.363945722579956, - "learning_rate": 3.194510746889679e-05, - "loss": 0.5106, - "step": 122540 - }, - { - "epoch": 1.0833819551265051, - "grad_norm": 57.95821762084961, - "learning_rate": 3.194363408122491e-05, - "loss": 0.6824, - "step": 122550 - }, - { - "epoch": 1.0834703583868173, - "grad_norm": 2.919809579849243, - "learning_rate": 3.194216069355305e-05, - "loss": 0.614, - "step": 122560 - }, - { - "epoch": 1.0835587616471296, - "grad_norm": 4.594418048858643, - "learning_rate": 3.194068730588118e-05, - "loss": 0.6027, - "step": 122570 - }, - { - "epoch": 1.0836471649074417, - "grad_norm": 6.151988983154297, - "learning_rate": 3.1939213918209305e-05, - "loss": 0.611, - "step": 122580 - }, - { - "epoch": 1.083735568167754, - "grad_norm": 1.7607015371322632, - "learning_rate": 3.193774053053743e-05, - "loss": 0.6062, - "step": 122590 - }, - { - "epoch": 1.0838239714280662, - "grad_norm": 1.4993444681167603, - "learning_rate": 3.193626714286556e-05, - "loss": 0.551, - "step": 122600 - }, - { - "epoch": 1.0839123746883785, - "grad_norm": 1.3588351011276245, - "learning_rate": 3.193479375519369e-05, - "loss": 0.6519, - "step": 122610 - }, - { - "epoch": 1.0840007779486907, - "grad_norm": 1.543945550918579, - "learning_rate": 3.1933320367521825e-05, - "loss": 0.621, - "step": 122620 - }, - { - "epoch": 1.084089181209003, - "grad_norm": 2.5363752841949463, - "learning_rate": 3.193184697984995e-05, - "loss": 0.6891, - "step": 122630 - }, - { - "epoch": 1.0841775844693151, - "grad_norm": 3.4957563877105713, - "learning_rate": 3.193037359217808e-05, - "loss": 0.4635, - "step": 122640 - }, - { - "epoch": 1.0842659877296275, - "grad_norm": 1.6638953685760498, - "learning_rate": 3.192890020450621e-05, - "loss": 0.467, - "step": 122650 - }, - { - "epoch": 1.0843543909899398, - "grad_norm": 3.1594300270080566, - "learning_rate": 3.192742681683434e-05, - "loss": 0.6271, - "step": 122660 - }, - { - "epoch": 1.084442794250252, - "grad_norm": 4.568197727203369, - "learning_rate": 3.192595342916247e-05, - "loss": 0.5805, - "step": 122670 - }, - { - "epoch": 1.0845311975105643, - "grad_norm": 3.5797080993652344, - "learning_rate": 3.19244800414906e-05, - "loss": 0.5598, - "step": 122680 - }, - { - "epoch": 1.0846196007708764, - "grad_norm": 3.7258315086364746, - "learning_rate": 3.1923006653818724e-05, - "loss": 0.5602, - "step": 122690 - }, - { - "epoch": 1.0847080040311887, - "grad_norm": 5.744478225708008, - "learning_rate": 3.192153326614686e-05, - "loss": 0.6259, - "step": 122700 - }, - { - "epoch": 1.0847964072915008, - "grad_norm": 1.917878270149231, - "learning_rate": 3.192005987847499e-05, - "loss": 0.6345, - "step": 122710 - }, - { - "epoch": 1.0848848105518132, - "grad_norm": 7.340816497802734, - "learning_rate": 3.1918586490803116e-05, - "loss": 0.7264, - "step": 122720 - }, - { - "epoch": 1.0849732138121253, - "grad_norm": 1.8755451440811157, - "learning_rate": 3.1917113103131244e-05, - "loss": 0.6663, - "step": 122730 - }, - { - "epoch": 1.0850616170724376, - "grad_norm": 7.353437900543213, - "learning_rate": 3.191563971545937e-05, - "loss": 0.5963, - "step": 122740 - }, - { - "epoch": 1.0851500203327498, - "grad_norm": 1.9176220893859863, - "learning_rate": 3.19141663277875e-05, - "loss": 0.5429, - "step": 122750 - }, - { - "epoch": 1.085238423593062, - "grad_norm": 1.859707236289978, - "learning_rate": 3.1912692940115636e-05, - "loss": 0.67, - "step": 122760 - }, - { - "epoch": 1.0853268268533744, - "grad_norm": 1.7249350547790527, - "learning_rate": 3.1911219552443764e-05, - "loss": 0.5487, - "step": 122770 - }, - { - "epoch": 1.0854152301136866, - "grad_norm": 6.912788391113281, - "learning_rate": 3.190974616477189e-05, - "loss": 0.7496, - "step": 122780 - }, - { - "epoch": 1.085503633373999, - "grad_norm": 1.5615384578704834, - "learning_rate": 3.190827277710002e-05, - "loss": 0.5046, - "step": 122790 - }, - { - "epoch": 1.085592036634311, - "grad_norm": 2.0725605487823486, - "learning_rate": 3.190679938942815e-05, - "loss": 0.5555, - "step": 122800 - }, - { - "epoch": 1.0856804398946234, - "grad_norm": 4.006580352783203, - "learning_rate": 3.190532600175628e-05, - "loss": 0.6636, - "step": 122810 - }, - { - "epoch": 1.0857688431549355, - "grad_norm": 6.023318290710449, - "learning_rate": 3.1903852614084406e-05, - "loss": 0.696, - "step": 122820 - }, - { - "epoch": 1.0858572464152478, - "grad_norm": 4.691527843475342, - "learning_rate": 3.190237922641254e-05, - "loss": 0.6718, - "step": 122830 - }, - { - "epoch": 1.08594564967556, - "grad_norm": 2.462649345397949, - "learning_rate": 3.190090583874067e-05, - "loss": 0.5945, - "step": 122840 - }, - { - "epoch": 1.0860340529358723, - "grad_norm": 3.252864122390747, - "learning_rate": 3.18994324510688e-05, - "loss": 0.8285, - "step": 122850 - }, - { - "epoch": 1.0861224561961844, - "grad_norm": 1.8932931423187256, - "learning_rate": 3.1897959063396926e-05, - "loss": 0.6723, - "step": 122860 - }, - { - "epoch": 1.0862108594564968, - "grad_norm": 4.223151683807373, - "learning_rate": 3.1896485675725054e-05, - "loss": 0.6034, - "step": 122870 - }, - { - "epoch": 1.086299262716809, - "grad_norm": 5.50346565246582, - "learning_rate": 3.189501228805318e-05, - "loss": 0.7297, - "step": 122880 - }, - { - "epoch": 1.0863876659771212, - "grad_norm": 4.345876216888428, - "learning_rate": 3.189353890038132e-05, - "loss": 0.6009, - "step": 122890 - }, - { - "epoch": 1.0864760692374336, - "grad_norm": 4.005703926086426, - "learning_rate": 3.1892065512709446e-05, - "loss": 0.7488, - "step": 122900 - }, - { - "epoch": 1.0865644724977457, - "grad_norm": 9.135722160339355, - "learning_rate": 3.1890592125037575e-05, - "loss": 0.6311, - "step": 122910 - }, - { - "epoch": 1.086652875758058, - "grad_norm": 9.949433326721191, - "learning_rate": 3.18891187373657e-05, - "loss": 0.6459, - "step": 122920 - }, - { - "epoch": 1.0867412790183701, - "grad_norm": 2.5519168376922607, - "learning_rate": 3.188764534969383e-05, - "loss": 0.5789, - "step": 122930 - }, - { - "epoch": 1.0868296822786825, - "grad_norm": 1.693414330482483, - "learning_rate": 3.188617196202196e-05, - "loss": 0.6737, - "step": 122940 - }, - { - "epoch": 1.0869180855389946, - "grad_norm": 3.470093011856079, - "learning_rate": 3.1884698574350095e-05, - "loss": 0.5935, - "step": 122950 - }, - { - "epoch": 1.087006488799307, - "grad_norm": 2.216045379638672, - "learning_rate": 3.1883225186678216e-05, - "loss": 0.7899, - "step": 122960 - }, - { - "epoch": 1.087094892059619, - "grad_norm": 1.5903863906860352, - "learning_rate": 3.188175179900635e-05, - "loss": 0.5255, - "step": 122970 - }, - { - "epoch": 1.0871832953199314, - "grad_norm": 4.212559223175049, - "learning_rate": 3.188027841133448e-05, - "loss": 0.6146, - "step": 122980 - }, - { - "epoch": 1.0872716985802438, - "grad_norm": 20.852306365966797, - "learning_rate": 3.187880502366261e-05, - "loss": 0.6694, - "step": 122990 - }, - { - "epoch": 1.0873601018405559, - "grad_norm": 5.608232498168945, - "learning_rate": 3.1877331635990737e-05, - "loss": 0.6996, - "step": 123000 - }, - { - "epoch": 1.0874485051008682, - "grad_norm": 3.7173542976379395, - "learning_rate": 3.187585824831887e-05, - "loss": 0.6763, - "step": 123010 - }, - { - "epoch": 1.0875369083611803, - "grad_norm": 2.7234785556793213, - "learning_rate": 3.187438486064699e-05, - "loss": 0.632, - "step": 123020 - }, - { - "epoch": 1.0876253116214927, - "grad_norm": 4.820131778717041, - "learning_rate": 3.187291147297513e-05, - "loss": 0.5988, - "step": 123030 - }, - { - "epoch": 1.0877137148818048, - "grad_norm": 1.576489806175232, - "learning_rate": 3.187143808530326e-05, - "loss": 0.5666, - "step": 123040 - }, - { - "epoch": 1.0878021181421171, - "grad_norm": 9.27625560760498, - "learning_rate": 3.1869964697631385e-05, - "loss": 0.6994, - "step": 123050 - }, - { - "epoch": 1.0878905214024293, - "grad_norm": 1.7611716985702515, - "learning_rate": 3.1868491309959513e-05, - "loss": 0.5743, - "step": 123060 - }, - { - "epoch": 1.0879789246627416, - "grad_norm": 1.4599748849868774, - "learning_rate": 3.186701792228764e-05, - "loss": 0.6491, - "step": 123070 - }, - { - "epoch": 1.0880673279230537, - "grad_norm": 6.349857807159424, - "learning_rate": 3.186554453461577e-05, - "loss": 0.6364, - "step": 123080 - }, - { - "epoch": 1.088155731183366, - "grad_norm": 1.1519315242767334, - "learning_rate": 3.1864071146943905e-05, - "loss": 0.4621, - "step": 123090 - }, - { - "epoch": 1.0882441344436782, - "grad_norm": 1.7422294616699219, - "learning_rate": 3.186259775927203e-05, - "loss": 0.6687, - "step": 123100 - }, - { - "epoch": 1.0883325377039905, - "grad_norm": 2.3867409229278564, - "learning_rate": 3.186112437160016e-05, - "loss": 0.6195, - "step": 123110 - }, - { - "epoch": 1.0884209409643029, - "grad_norm": 3.232933282852173, - "learning_rate": 3.185965098392829e-05, - "loss": 0.5636, - "step": 123120 - }, - { - "epoch": 1.088509344224615, - "grad_norm": 3.027165651321411, - "learning_rate": 3.185817759625642e-05, - "loss": 0.5069, - "step": 123130 - }, - { - "epoch": 1.0885977474849273, - "grad_norm": 3.1634950637817383, - "learning_rate": 3.185670420858455e-05, - "loss": 0.5984, - "step": 123140 - }, - { - "epoch": 1.0886861507452394, - "grad_norm": 5.46713924407959, - "learning_rate": 3.185523082091268e-05, - "loss": 0.6881, - "step": 123150 - }, - { - "epoch": 1.0887745540055518, - "grad_norm": 1.8680810928344727, - "learning_rate": 3.1853757433240804e-05, - "loss": 0.6447, - "step": 123160 - }, - { - "epoch": 1.088862957265864, - "grad_norm": 1.7547731399536133, - "learning_rate": 3.185228404556894e-05, - "loss": 0.7225, - "step": 123170 - }, - { - "epoch": 1.0889513605261762, - "grad_norm": 2.694031238555908, - "learning_rate": 3.185081065789706e-05, - "loss": 0.6521, - "step": 123180 - }, - { - "epoch": 1.0890397637864884, - "grad_norm": 3.2728137969970703, - "learning_rate": 3.1849337270225196e-05, - "loss": 0.681, - "step": 123190 - }, - { - "epoch": 1.0891281670468007, - "grad_norm": 11.53569507598877, - "learning_rate": 3.1847863882553324e-05, - "loss": 0.6359, - "step": 123200 - }, - { - "epoch": 1.0892165703071128, - "grad_norm": 2.5578250885009766, - "learning_rate": 3.184639049488145e-05, - "loss": 0.7334, - "step": 123210 - }, - { - "epoch": 1.0893049735674252, - "grad_norm": 2.361954927444458, - "learning_rate": 3.184491710720958e-05, - "loss": 0.5542, - "step": 123220 - }, - { - "epoch": 1.0893933768277373, - "grad_norm": 31.11703109741211, - "learning_rate": 3.1843443719537716e-05, - "loss": 0.7469, - "step": 123230 - }, - { - "epoch": 1.0894817800880496, - "grad_norm": 6.2968430519104, - "learning_rate": 3.184197033186584e-05, - "loss": 0.6461, - "step": 123240 - }, - { - "epoch": 1.089570183348362, - "grad_norm": 2.392772912979126, - "learning_rate": 3.184049694419397e-05, - "loss": 0.56, - "step": 123250 - }, - { - "epoch": 1.089658586608674, - "grad_norm": 1.6381855010986328, - "learning_rate": 3.18390235565221e-05, - "loss": 0.6057, - "step": 123260 - }, - { - "epoch": 1.0897469898689864, - "grad_norm": 4.16072940826416, - "learning_rate": 3.183755016885023e-05, - "loss": 0.5933, - "step": 123270 - }, - { - "epoch": 1.0898353931292986, - "grad_norm": 1.7321690320968628, - "learning_rate": 3.183607678117836e-05, - "loss": 0.5009, - "step": 123280 - }, - { - "epoch": 1.089923796389611, - "grad_norm": 3.3836262226104736, - "learning_rate": 3.1834603393506486e-05, - "loss": 0.5166, - "step": 123290 - }, - { - "epoch": 1.090012199649923, - "grad_norm": 1.843558430671692, - "learning_rate": 3.1833130005834614e-05, - "loss": 0.7366, - "step": 123300 - }, - { - "epoch": 1.0901006029102354, - "grad_norm": 0.9097705483436584, - "learning_rate": 3.183165661816275e-05, - "loss": 0.5847, - "step": 123310 - }, - { - "epoch": 1.0901890061705475, - "grad_norm": 2.4408435821533203, - "learning_rate": 3.183018323049087e-05, - "loss": 0.5333, - "step": 123320 - }, - { - "epoch": 1.0902774094308598, - "grad_norm": 4.341062068939209, - "learning_rate": 3.1828709842819006e-05, - "loss": 0.5979, - "step": 123330 - }, - { - "epoch": 1.090365812691172, - "grad_norm": 2.545137643814087, - "learning_rate": 3.1827236455147134e-05, - "loss": 0.6414, - "step": 123340 - }, - { - "epoch": 1.0904542159514843, - "grad_norm": 4.5739264488220215, - "learning_rate": 3.182576306747526e-05, - "loss": 0.5754, - "step": 123350 - }, - { - "epoch": 1.0905426192117966, - "grad_norm": 1.841970682144165, - "learning_rate": 3.182428967980339e-05, - "loss": 0.6466, - "step": 123360 - }, - { - "epoch": 1.0906310224721087, - "grad_norm": 1.1298997402191162, - "learning_rate": 3.1822816292131526e-05, - "loss": 0.6019, - "step": 123370 - }, - { - "epoch": 1.090719425732421, - "grad_norm": 2.7480437755584717, - "learning_rate": 3.182134290445965e-05, - "loss": 0.6754, - "step": 123380 - }, - { - "epoch": 1.0908078289927332, - "grad_norm": 2.2227084636688232, - "learning_rate": 3.181986951678778e-05, - "loss": 0.5505, - "step": 123390 - }, - { - "epoch": 1.0908962322530456, - "grad_norm": 3.8440029621124268, - "learning_rate": 3.181839612911591e-05, - "loss": 0.5024, - "step": 123400 - }, - { - "epoch": 1.0909846355133577, - "grad_norm": 4.927218914031982, - "learning_rate": 3.181692274144404e-05, - "loss": 0.6554, - "step": 123410 - }, - { - "epoch": 1.09107303877367, - "grad_norm": 4.422795295715332, - "learning_rate": 3.181544935377217e-05, - "loss": 0.6696, - "step": 123420 - }, - { - "epoch": 1.0911614420339821, - "grad_norm": 2.428706407546997, - "learning_rate": 3.1813975966100296e-05, - "loss": 0.5874, - "step": 123430 - }, - { - "epoch": 1.0912498452942945, - "grad_norm": 4.93586540222168, - "learning_rate": 3.1812502578428425e-05, - "loss": 0.5851, - "step": 123440 - }, - { - "epoch": 1.0913382485546066, - "grad_norm": 2.1009678840637207, - "learning_rate": 3.181102919075656e-05, - "loss": 0.5244, - "step": 123450 - }, - { - "epoch": 1.091426651814919, - "grad_norm": 20.023983001708984, - "learning_rate": 3.180955580308468e-05, - "loss": 0.5989, - "step": 123460 - }, - { - "epoch": 1.0915150550752313, - "grad_norm": 1.8608654737472534, - "learning_rate": 3.180808241541282e-05, - "loss": 0.6433, - "step": 123470 - }, - { - "epoch": 1.0916034583355434, - "grad_norm": 5.683498382568359, - "learning_rate": 3.1806609027740945e-05, - "loss": 0.551, - "step": 123480 - }, - { - "epoch": 1.0916918615958557, - "grad_norm": 2.197681427001953, - "learning_rate": 3.180513564006907e-05, - "loss": 0.6296, - "step": 123490 - }, - { - "epoch": 1.0917802648561679, - "grad_norm": 2.280125379562378, - "learning_rate": 3.18036622523972e-05, - "loss": 0.5279, - "step": 123500 - }, - { - "epoch": 1.0918686681164802, - "grad_norm": 1.1667300462722778, - "learning_rate": 3.180218886472534e-05, - "loss": 0.5624, - "step": 123510 - }, - { - "epoch": 1.0919570713767923, - "grad_norm": 2.5631821155548096, - "learning_rate": 3.180071547705346e-05, - "loss": 0.4854, - "step": 123520 - }, - { - "epoch": 1.0920454746371047, - "grad_norm": 1.6595871448516846, - "learning_rate": 3.1799242089381594e-05, - "loss": 0.667, - "step": 123530 - }, - { - "epoch": 1.0921338778974168, - "grad_norm": 3.094825029373169, - "learning_rate": 3.1797768701709715e-05, - "loss": 0.689, - "step": 123540 - }, - { - "epoch": 1.0922222811577291, - "grad_norm": 1.3138394355773926, - "learning_rate": 3.179629531403785e-05, - "loss": 0.6058, - "step": 123550 - }, - { - "epoch": 1.0923106844180412, - "grad_norm": 2.099130392074585, - "learning_rate": 3.179482192636598e-05, - "loss": 0.643, - "step": 123560 - }, - { - "epoch": 1.0923990876783536, - "grad_norm": 2.628974676132202, - "learning_rate": 3.179334853869411e-05, - "loss": 0.6483, - "step": 123570 - }, - { - "epoch": 1.092487490938666, - "grad_norm": 2.490139961242676, - "learning_rate": 3.1791875151022235e-05, - "loss": 0.7011, - "step": 123580 - }, - { - "epoch": 1.092575894198978, - "grad_norm": 1.8102847337722778, - "learning_rate": 3.179040176335037e-05, - "loss": 0.5049, - "step": 123590 - }, - { - "epoch": 1.0926642974592904, - "grad_norm": 6.975493907928467, - "learning_rate": 3.178892837567849e-05, - "loss": 0.6521, - "step": 123600 - }, - { - "epoch": 1.0927527007196025, - "grad_norm": 2.1386125087738037, - "learning_rate": 3.178745498800663e-05, - "loss": 0.6642, - "step": 123610 - }, - { - "epoch": 1.0928411039799149, - "grad_norm": 8.358141899108887, - "learning_rate": 3.1785981600334755e-05, - "loss": 0.6421, - "step": 123620 - }, - { - "epoch": 1.092929507240227, - "grad_norm": 1.8538811206817627, - "learning_rate": 3.1784508212662884e-05, - "loss": 0.6678, - "step": 123630 - }, - { - "epoch": 1.0930179105005393, - "grad_norm": 2.4046263694763184, - "learning_rate": 3.178303482499101e-05, - "loss": 0.6618, - "step": 123640 - }, - { - "epoch": 1.0931063137608514, - "grad_norm": 3.560228109359741, - "learning_rate": 3.178156143731914e-05, - "loss": 0.6322, - "step": 123650 - }, - { - "epoch": 1.0931947170211638, - "grad_norm": 6.775623798370361, - "learning_rate": 3.178008804964727e-05, - "loss": 0.5787, - "step": 123660 - }, - { - "epoch": 1.093283120281476, - "grad_norm": 3.2997021675109863, - "learning_rate": 3.1778614661975404e-05, - "loss": 0.6072, - "step": 123670 - }, - { - "epoch": 1.0933715235417882, - "grad_norm": 1.1060670614242554, - "learning_rate": 3.177714127430353e-05, - "loss": 0.6452, - "step": 123680 - }, - { - "epoch": 1.0934599268021006, - "grad_norm": 1.32697594165802, - "learning_rate": 3.177566788663166e-05, - "loss": 0.6269, - "step": 123690 - }, - { - "epoch": 1.0935483300624127, - "grad_norm": 2.1969361305236816, - "learning_rate": 3.177419449895979e-05, - "loss": 0.5655, - "step": 123700 - }, - { - "epoch": 1.093636733322725, - "grad_norm": 1.4848862886428833, - "learning_rate": 3.177272111128792e-05, - "loss": 0.6188, - "step": 123710 - }, - { - "epoch": 1.0937251365830372, - "grad_norm": 4.382133483886719, - "learning_rate": 3.1771247723616046e-05, - "loss": 0.7114, - "step": 123720 - }, - { - "epoch": 1.0938135398433495, - "grad_norm": 1.4304269552230835, - "learning_rate": 3.176977433594418e-05, - "loss": 0.5503, - "step": 123730 - }, - { - "epoch": 1.0939019431036616, - "grad_norm": 0.9899899363517761, - "learning_rate": 3.176830094827231e-05, - "loss": 0.7028, - "step": 123740 - }, - { - "epoch": 1.093990346363974, - "grad_norm": 1.4632052183151245, - "learning_rate": 3.176682756060044e-05, - "loss": 0.6318, - "step": 123750 - }, - { - "epoch": 1.094078749624286, - "grad_norm": 1.8045881986618042, - "learning_rate": 3.1765354172928566e-05, - "loss": 0.6597, - "step": 123760 - }, - { - "epoch": 1.0941671528845984, - "grad_norm": 3.3151791095733643, - "learning_rate": 3.1763880785256694e-05, - "loss": 0.484, - "step": 123770 - }, - { - "epoch": 1.0942555561449105, - "grad_norm": 3.7142739295959473, - "learning_rate": 3.176240739758482e-05, - "loss": 0.5963, - "step": 123780 - }, - { - "epoch": 1.0943439594052229, - "grad_norm": 13.858745574951172, - "learning_rate": 3.176093400991295e-05, - "loss": 0.5214, - "step": 123790 - }, - { - "epoch": 1.094432362665535, - "grad_norm": 1.7841321229934692, - "learning_rate": 3.1759460622241086e-05, - "loss": 0.501, - "step": 123800 - }, - { - "epoch": 1.0945207659258473, - "grad_norm": 1.2429823875427246, - "learning_rate": 3.1757987234569215e-05, - "loss": 0.6267, - "step": 123810 - }, - { - "epoch": 1.0946091691861595, - "grad_norm": 1.8305622339248657, - "learning_rate": 3.175651384689734e-05, - "loss": 0.8363, - "step": 123820 - }, - { - "epoch": 1.0946975724464718, - "grad_norm": 2.0226359367370605, - "learning_rate": 3.175504045922547e-05, - "loss": 0.6559, - "step": 123830 - }, - { - "epoch": 1.0947859757067842, - "grad_norm": 3.287172555923462, - "learning_rate": 3.17535670715536e-05, - "loss": 0.6691, - "step": 123840 - }, - { - "epoch": 1.0948743789670963, - "grad_norm": 2.5246734619140625, - "learning_rate": 3.175209368388173e-05, - "loss": 0.6421, - "step": 123850 - }, - { - "epoch": 1.0949627822274086, - "grad_norm": 5.257667064666748, - "learning_rate": 3.175062029620986e-05, - "loss": 0.6591, - "step": 123860 - }, - { - "epoch": 1.0950511854877207, - "grad_norm": 3.2301576137542725, - "learning_rate": 3.174914690853799e-05, - "loss": 0.4681, - "step": 123870 - }, - { - "epoch": 1.095139588748033, - "grad_norm": 4.241169452667236, - "learning_rate": 3.174767352086612e-05, - "loss": 0.6829, - "step": 123880 - }, - { - "epoch": 1.0952279920083452, - "grad_norm": 1.628957748413086, - "learning_rate": 3.174620013319425e-05, - "loss": 0.6162, - "step": 123890 - }, - { - "epoch": 1.0953163952686575, - "grad_norm": 3.2245240211486816, - "learning_rate": 3.1744726745522376e-05, - "loss": 0.622, - "step": 123900 - }, - { - "epoch": 1.0954047985289697, - "grad_norm": 1.9790922403335571, - "learning_rate": 3.1743253357850505e-05, - "loss": 0.6179, - "step": 123910 - }, - { - "epoch": 1.095493201789282, - "grad_norm": 2.3553946018218994, - "learning_rate": 3.174177997017864e-05, - "loss": 0.709, - "step": 123920 - }, - { - "epoch": 1.0955816050495941, - "grad_norm": 2.2758944034576416, - "learning_rate": 3.174030658250676e-05, - "loss": 0.5286, - "step": 123930 - }, - { - "epoch": 1.0956700083099065, - "grad_norm": 2.614903211593628, - "learning_rate": 3.17388331948349e-05, - "loss": 0.5457, - "step": 123940 - }, - { - "epoch": 1.0957584115702188, - "grad_norm": 2.2489240169525146, - "learning_rate": 3.1737359807163025e-05, - "loss": 0.5752, - "step": 123950 - }, - { - "epoch": 1.095846814830531, - "grad_norm": 3.6554696559906006, - "learning_rate": 3.1735886419491153e-05, - "loss": 0.6931, - "step": 123960 - }, - { - "epoch": 1.0959352180908433, - "grad_norm": 2.23058819770813, - "learning_rate": 3.173441303181928e-05, - "loss": 0.6354, - "step": 123970 - }, - { - "epoch": 1.0960236213511554, - "grad_norm": 2.3187522888183594, - "learning_rate": 3.173293964414742e-05, - "loss": 0.5745, - "step": 123980 - }, - { - "epoch": 1.0961120246114677, - "grad_norm": 2.40840744972229, - "learning_rate": 3.173146625647554e-05, - "loss": 0.592, - "step": 123990 - }, - { - "epoch": 1.0962004278717798, - "grad_norm": 2.2621009349823, - "learning_rate": 3.1729992868803674e-05, - "loss": 0.6942, - "step": 124000 - }, - { - "epoch": 1.0962888311320922, - "grad_norm": 3.0730385780334473, - "learning_rate": 3.1728519481131795e-05, - "loss": 0.61, - "step": 124010 - }, - { - "epoch": 1.0963772343924043, - "grad_norm": 5.8164777755737305, - "learning_rate": 3.172704609345993e-05, - "loss": 0.5204, - "step": 124020 - }, - { - "epoch": 1.0964656376527167, - "grad_norm": 2.794093370437622, - "learning_rate": 3.172557270578806e-05, - "loss": 0.5585, - "step": 124030 - }, - { - "epoch": 1.0965540409130288, - "grad_norm": 1.3437511920928955, - "learning_rate": 3.172409931811619e-05, - "loss": 0.4742, - "step": 124040 - }, - { - "epoch": 1.0966424441733411, - "grad_norm": 3.45401668548584, - "learning_rate": 3.1722625930444315e-05, - "loss": 0.5125, - "step": 124050 - }, - { - "epoch": 1.0967308474336535, - "grad_norm": 6.227214336395264, - "learning_rate": 3.172115254277245e-05, - "loss": 0.6378, - "step": 124060 - }, - { - "epoch": 1.0968192506939656, - "grad_norm": 4.764873027801514, - "learning_rate": 3.171967915510057e-05, - "loss": 0.6519, - "step": 124070 - }, - { - "epoch": 1.096907653954278, - "grad_norm": 1.779847502708435, - "learning_rate": 3.171820576742871e-05, - "loss": 0.528, - "step": 124080 - }, - { - "epoch": 1.09699605721459, - "grad_norm": 6.919624328613281, - "learning_rate": 3.1716732379756836e-05, - "loss": 0.5687, - "step": 124090 - }, - { - "epoch": 1.0970844604749024, - "grad_norm": 8.186634063720703, - "learning_rate": 3.1715258992084964e-05, - "loss": 0.7371, - "step": 124100 - }, - { - "epoch": 1.0971728637352145, - "grad_norm": 5.432801246643066, - "learning_rate": 3.171378560441309e-05, - "loss": 0.727, - "step": 124110 - }, - { - "epoch": 1.0972612669955268, - "grad_norm": 4.025297164916992, - "learning_rate": 3.171231221674122e-05, - "loss": 0.6859, - "step": 124120 - }, - { - "epoch": 1.097349670255839, - "grad_norm": 10.788854598999023, - "learning_rate": 3.171083882906935e-05, - "loss": 0.5939, - "step": 124130 - }, - { - "epoch": 1.0974380735161513, - "grad_norm": 8.757100105285645, - "learning_rate": 3.1709365441397484e-05, - "loss": 0.5978, - "step": 124140 - }, - { - "epoch": 1.0975264767764634, - "grad_norm": 1.8570926189422607, - "learning_rate": 3.1707892053725606e-05, - "loss": 0.5262, - "step": 124150 - }, - { - "epoch": 1.0976148800367758, - "grad_norm": 1.9620702266693115, - "learning_rate": 3.170641866605374e-05, - "loss": 0.6535, - "step": 124160 - }, - { - "epoch": 1.097703283297088, - "grad_norm": 2.4935414791107178, - "learning_rate": 3.170494527838187e-05, - "loss": 0.6419, - "step": 124170 - }, - { - "epoch": 1.0977916865574002, - "grad_norm": 2.49300217628479, - "learning_rate": 3.170347189071e-05, - "loss": 0.5589, - "step": 124180 - }, - { - "epoch": 1.0978800898177126, - "grad_norm": 8.12836742401123, - "learning_rate": 3.1701998503038126e-05, - "loss": 0.6959, - "step": 124190 - }, - { - "epoch": 1.0979684930780247, - "grad_norm": 3.762848377227783, - "learning_rate": 3.170052511536626e-05, - "loss": 0.6687, - "step": 124200 - }, - { - "epoch": 1.098056896338337, - "grad_norm": 2.8207998275756836, - "learning_rate": 3.169905172769438e-05, - "loss": 0.5983, - "step": 124210 - }, - { - "epoch": 1.0981452995986491, - "grad_norm": 2.1519622802734375, - "learning_rate": 3.169757834002252e-05, - "loss": 0.5559, - "step": 124220 - }, - { - "epoch": 1.0982337028589615, - "grad_norm": 3.45025897026062, - "learning_rate": 3.169610495235064e-05, - "loss": 0.4949, - "step": 124230 - }, - { - "epoch": 1.0983221061192736, - "grad_norm": 2.2874369621276855, - "learning_rate": 3.1694631564678774e-05, - "loss": 0.5615, - "step": 124240 - }, - { - "epoch": 1.098410509379586, - "grad_norm": 0.9942915439605713, - "learning_rate": 3.16931581770069e-05, - "loss": 0.5601, - "step": 124250 - }, - { - "epoch": 1.098498912639898, - "grad_norm": 1.8301376104354858, - "learning_rate": 3.169168478933503e-05, - "loss": 0.6039, - "step": 124260 - }, - { - "epoch": 1.0985873159002104, - "grad_norm": 1.2993732690811157, - "learning_rate": 3.169021140166316e-05, - "loss": 0.6644, - "step": 124270 - }, - { - "epoch": 1.0986757191605228, - "grad_norm": 3.9826393127441406, - "learning_rate": 3.1688738013991295e-05, - "loss": 0.6299, - "step": 124280 - }, - { - "epoch": 1.0987641224208349, - "grad_norm": 5.105567932128906, - "learning_rate": 3.1687264626319416e-05, - "loss": 0.7223, - "step": 124290 - }, - { - "epoch": 1.0988525256811472, - "grad_norm": 3.4585683345794678, - "learning_rate": 3.168579123864755e-05, - "loss": 0.5159, - "step": 124300 - }, - { - "epoch": 1.0989409289414593, - "grad_norm": 2.7868947982788086, - "learning_rate": 3.168431785097568e-05, - "loss": 0.61, - "step": 124310 - }, - { - "epoch": 1.0990293322017717, - "grad_norm": 2.120448589324951, - "learning_rate": 3.168284446330381e-05, - "loss": 0.579, - "step": 124320 - }, - { - "epoch": 1.0991177354620838, - "grad_norm": 6.469594478607178, - "learning_rate": 3.1681371075631936e-05, - "loss": 0.5175, - "step": 124330 - }, - { - "epoch": 1.0992061387223961, - "grad_norm": 4.462221145629883, - "learning_rate": 3.167989768796007e-05, - "loss": 0.6313, - "step": 124340 - }, - { - "epoch": 1.0992945419827083, - "grad_norm": 2.2095582485198975, - "learning_rate": 3.167842430028819e-05, - "loss": 0.6948, - "step": 124350 - }, - { - "epoch": 1.0993829452430206, - "grad_norm": 1.5518501996994019, - "learning_rate": 3.167695091261633e-05, - "loss": 0.6219, - "step": 124360 - }, - { - "epoch": 1.0994713485033327, - "grad_norm": 2.8392245769500732, - "learning_rate": 3.167547752494445e-05, - "loss": 0.4992, - "step": 124370 - }, - { - "epoch": 1.099559751763645, - "grad_norm": 2.7968928813934326, - "learning_rate": 3.1674004137272585e-05, - "loss": 0.5455, - "step": 124380 - }, - { - "epoch": 1.0996481550239572, - "grad_norm": 2.4937191009521484, - "learning_rate": 3.167253074960071e-05, - "loss": 0.5717, - "step": 124390 - }, - { - "epoch": 1.0997365582842695, - "grad_norm": 1.6564760208129883, - "learning_rate": 3.167105736192884e-05, - "loss": 0.7106, - "step": 124400 - }, - { - "epoch": 1.0998249615445816, - "grad_norm": 1.3187408447265625, - "learning_rate": 3.166958397425697e-05, - "loss": 0.5031, - "step": 124410 - }, - { - "epoch": 1.099913364804894, - "grad_norm": 0.8784456253051758, - "learning_rate": 3.1668110586585105e-05, - "loss": 0.5915, - "step": 124420 - }, - { - "epoch": 1.1000017680652063, - "grad_norm": 1.3571583032608032, - "learning_rate": 3.166663719891323e-05, - "loss": 0.5861, - "step": 124430 - }, - { - "epoch": 1.1000901713255185, - "grad_norm": 1.1783332824707031, - "learning_rate": 3.166516381124136e-05, - "loss": 0.6347, - "step": 124440 - }, - { - "epoch": 1.1001785745858308, - "grad_norm": 1.5184040069580078, - "learning_rate": 3.166369042356949e-05, - "loss": 0.6243, - "step": 124450 - }, - { - "epoch": 1.100266977846143, - "grad_norm": 0.8016341924667358, - "learning_rate": 3.166221703589762e-05, - "loss": 0.4972, - "step": 124460 - }, - { - "epoch": 1.1003553811064553, - "grad_norm": 2.7434635162353516, - "learning_rate": 3.166074364822575e-05, - "loss": 0.6019, - "step": 124470 - }, - { - "epoch": 1.1004437843667674, - "grad_norm": 15.655183792114258, - "learning_rate": 3.1659270260553875e-05, - "loss": 0.7312, - "step": 124480 - }, - { - "epoch": 1.1005321876270797, - "grad_norm": 5.6018147468566895, - "learning_rate": 3.1657796872882004e-05, - "loss": 0.5543, - "step": 124490 - }, - { - "epoch": 1.1006205908873918, - "grad_norm": 18.7354679107666, - "learning_rate": 3.165632348521014e-05, - "loss": 0.7445, - "step": 124500 - }, - { - "epoch": 1.1007089941477042, - "grad_norm": 3.9416720867156982, - "learning_rate": 3.165485009753826e-05, - "loss": 0.746, - "step": 124510 - }, - { - "epoch": 1.1007973974080163, - "grad_norm": 2.296971321105957, - "learning_rate": 3.1653376709866395e-05, - "loss": 0.5416, - "step": 124520 - }, - { - "epoch": 1.1008858006683286, - "grad_norm": 2.4656710624694824, - "learning_rate": 3.1651903322194524e-05, - "loss": 0.674, - "step": 124530 - }, - { - "epoch": 1.100974203928641, - "grad_norm": 5.1056647300720215, - "learning_rate": 3.165042993452265e-05, - "loss": 0.5288, - "step": 124540 - }, - { - "epoch": 1.101062607188953, - "grad_norm": 2.2869575023651123, - "learning_rate": 3.164895654685078e-05, - "loss": 0.4813, - "step": 124550 - }, - { - "epoch": 1.1011510104492654, - "grad_norm": 1.507654070854187, - "learning_rate": 3.1647483159178916e-05, - "loss": 0.6334, - "step": 124560 - }, - { - "epoch": 1.1012394137095776, - "grad_norm": 7.412845134735107, - "learning_rate": 3.164600977150704e-05, - "loss": 0.5261, - "step": 124570 - }, - { - "epoch": 1.10132781696989, - "grad_norm": 21.285717010498047, - "learning_rate": 3.164453638383517e-05, - "loss": 0.5806, - "step": 124580 - }, - { - "epoch": 1.101416220230202, - "grad_norm": 0.9926066994667053, - "learning_rate": 3.16430629961633e-05, - "loss": 0.5848, - "step": 124590 - }, - { - "epoch": 1.1015046234905144, - "grad_norm": 2.340979814529419, - "learning_rate": 3.164158960849143e-05, - "loss": 0.5419, - "step": 124600 - }, - { - "epoch": 1.1015930267508265, - "grad_norm": 18.688566207885742, - "learning_rate": 3.164011622081956e-05, - "loss": 0.4664, - "step": 124610 - }, - { - "epoch": 1.1016814300111388, - "grad_norm": 1.94839346408844, - "learning_rate": 3.1638642833147686e-05, - "loss": 0.593, - "step": 124620 - }, - { - "epoch": 1.101769833271451, - "grad_norm": 1.2230571508407593, - "learning_rate": 3.1637169445475814e-05, - "loss": 0.5186, - "step": 124630 - }, - { - "epoch": 1.1018582365317633, - "grad_norm": 5.989471912384033, - "learning_rate": 3.163569605780395e-05, - "loss": 0.693, - "step": 124640 - }, - { - "epoch": 1.1019466397920756, - "grad_norm": 4.454293727874756, - "learning_rate": 3.163422267013208e-05, - "loss": 0.6537, - "step": 124650 - }, - { - "epoch": 1.1020350430523878, - "grad_norm": 3.478476047515869, - "learning_rate": 3.1632749282460206e-05, - "loss": 0.6131, - "step": 124660 - }, - { - "epoch": 1.1021234463127, - "grad_norm": 3.6605007648468018, - "learning_rate": 3.1631275894788334e-05, - "loss": 0.5098, - "step": 124670 - }, - { - "epoch": 1.1022118495730122, - "grad_norm": 1.8386131525039673, - "learning_rate": 3.162980250711646e-05, - "loss": 0.5756, - "step": 124680 - }, - { - "epoch": 1.1023002528333246, - "grad_norm": 9.639357566833496, - "learning_rate": 3.162832911944459e-05, - "loss": 0.6886, - "step": 124690 - }, - { - "epoch": 1.1023886560936367, - "grad_norm": 3.2937099933624268, - "learning_rate": 3.162685573177272e-05, - "loss": 0.6433, - "step": 124700 - }, - { - "epoch": 1.102477059353949, - "grad_norm": 1.8968168497085571, - "learning_rate": 3.1625382344100854e-05, - "loss": 0.5272, - "step": 124710 - }, - { - "epoch": 1.1025654626142611, - "grad_norm": 2.356100559234619, - "learning_rate": 3.162390895642898e-05, - "loss": 0.6988, - "step": 124720 - }, - { - "epoch": 1.1026538658745735, - "grad_norm": 2.712435722351074, - "learning_rate": 3.162243556875711e-05, - "loss": 0.6932, - "step": 124730 - }, - { - "epoch": 1.1027422691348856, - "grad_norm": 5.530733108520508, - "learning_rate": 3.162096218108524e-05, - "loss": 0.5679, - "step": 124740 - }, - { - "epoch": 1.102830672395198, - "grad_norm": 1.4024028778076172, - "learning_rate": 3.161948879341337e-05, - "loss": 0.5861, - "step": 124750 - }, - { - "epoch": 1.1029190756555103, - "grad_norm": 12.713836669921875, - "learning_rate": 3.1618015405741496e-05, - "loss": 0.7116, - "step": 124760 - }, - { - "epoch": 1.1030074789158224, - "grad_norm": 3.951727867126465, - "learning_rate": 3.161654201806963e-05, - "loss": 0.5621, - "step": 124770 - }, - { - "epoch": 1.1030958821761347, - "grad_norm": 4.476200103759766, - "learning_rate": 3.161506863039776e-05, - "loss": 0.5257, - "step": 124780 - }, - { - "epoch": 1.1031842854364469, - "grad_norm": 4.1308979988098145, - "learning_rate": 3.161359524272589e-05, - "loss": 0.5562, - "step": 124790 - }, - { - "epoch": 1.1032726886967592, - "grad_norm": 7.531303882598877, - "learning_rate": 3.1612121855054016e-05, - "loss": 0.6974, - "step": 124800 - }, - { - "epoch": 1.1033610919570713, - "grad_norm": 2.629441499710083, - "learning_rate": 3.1610648467382145e-05, - "loss": 0.8278, - "step": 124810 - }, - { - "epoch": 1.1034494952173837, - "grad_norm": 1.5622435808181763, - "learning_rate": 3.160917507971027e-05, - "loss": 0.6904, - "step": 124820 - }, - { - "epoch": 1.1035378984776958, - "grad_norm": 2.1056606769561768, - "learning_rate": 3.160770169203841e-05, - "loss": 0.5995, - "step": 124830 - }, - { - "epoch": 1.1036263017380081, - "grad_norm": 5.661498546600342, - "learning_rate": 3.160622830436653e-05, - "loss": 0.6521, - "step": 124840 - }, - { - "epoch": 1.1037147049983203, - "grad_norm": 5.302452564239502, - "learning_rate": 3.1604754916694665e-05, - "loss": 0.575, - "step": 124850 - }, - { - "epoch": 1.1038031082586326, - "grad_norm": 3.152299165725708, - "learning_rate": 3.160328152902279e-05, - "loss": 0.6587, - "step": 124860 - }, - { - "epoch": 1.103891511518945, - "grad_norm": 5.2222490310668945, - "learning_rate": 3.160180814135092e-05, - "loss": 0.626, - "step": 124870 - }, - { - "epoch": 1.103979914779257, - "grad_norm": 4.738489151000977, - "learning_rate": 3.160033475367905e-05, - "loss": 0.6006, - "step": 124880 - }, - { - "epoch": 1.1040683180395694, - "grad_norm": 6.9758734703063965, - "learning_rate": 3.1598861366007185e-05, - "loss": 0.6588, - "step": 124890 - }, - { - "epoch": 1.1041567212998815, - "grad_norm": 4.444128036499023, - "learning_rate": 3.159738797833531e-05, - "loss": 0.51, - "step": 124900 - }, - { - "epoch": 1.1042451245601939, - "grad_norm": 2.0373830795288086, - "learning_rate": 3.159591459066344e-05, - "loss": 0.5792, - "step": 124910 - }, - { - "epoch": 1.104333527820506, - "grad_norm": 3.0136606693267822, - "learning_rate": 3.159444120299157e-05, - "loss": 0.4443, - "step": 124920 - }, - { - "epoch": 1.1044219310808183, - "grad_norm": 4.708420276641846, - "learning_rate": 3.15929678153197e-05, - "loss": 0.6843, - "step": 124930 - }, - { - "epoch": 1.1045103343411304, - "grad_norm": 6.26500129699707, - "learning_rate": 3.159149442764783e-05, - "loss": 0.5731, - "step": 124940 - }, - { - "epoch": 1.1045987376014428, - "grad_norm": 6.592031955718994, - "learning_rate": 3.1590021039975955e-05, - "loss": 0.7107, - "step": 124950 - }, - { - "epoch": 1.104687140861755, - "grad_norm": 3.4746623039245605, - "learning_rate": 3.1588547652304084e-05, - "loss": 0.6429, - "step": 124960 - }, - { - "epoch": 1.1047755441220672, - "grad_norm": 4.6872782707214355, - "learning_rate": 3.158707426463222e-05, - "loss": 0.735, - "step": 124970 - }, - { - "epoch": 1.1048639473823794, - "grad_norm": 1.2400972843170166, - "learning_rate": 3.158560087696034e-05, - "loss": 0.5657, - "step": 124980 - }, - { - "epoch": 1.1049523506426917, - "grad_norm": 7.700628757476807, - "learning_rate": 3.1584127489288475e-05, - "loss": 0.6607, - "step": 124990 - }, - { - "epoch": 1.1050407539030038, - "grad_norm": 6.96332311630249, - "learning_rate": 3.1582654101616604e-05, - "loss": 0.733, - "step": 125000 - }, - { - "epoch": 1.1051291571633162, - "grad_norm": 2.716226100921631, - "learning_rate": 3.158118071394473e-05, - "loss": 0.6108, - "step": 125010 - }, - { - "epoch": 1.1052175604236285, - "grad_norm": 1.9164835214614868, - "learning_rate": 3.157970732627286e-05, - "loss": 0.4986, - "step": 125020 - }, - { - "epoch": 1.1053059636839406, - "grad_norm": 5.984572410583496, - "learning_rate": 3.1578233938600996e-05, - "loss": 0.5988, - "step": 125030 - }, - { - "epoch": 1.105394366944253, - "grad_norm": 9.276653289794922, - "learning_rate": 3.157676055092912e-05, - "loss": 0.7256, - "step": 125040 - }, - { - "epoch": 1.105482770204565, - "grad_norm": 4.372081756591797, - "learning_rate": 3.157528716325725e-05, - "loss": 0.6523, - "step": 125050 - }, - { - "epoch": 1.1055711734648774, - "grad_norm": 4.581477165222168, - "learning_rate": 3.1573813775585374e-05, - "loss": 0.6441, - "step": 125060 - }, - { - "epoch": 1.1056595767251896, - "grad_norm": 1.8464874029159546, - "learning_rate": 3.157234038791351e-05, - "loss": 0.5518, - "step": 125070 - }, - { - "epoch": 1.105747979985502, - "grad_norm": 1.789445161819458, - "learning_rate": 3.157086700024164e-05, - "loss": 0.6388, - "step": 125080 - }, - { - "epoch": 1.105836383245814, - "grad_norm": 2.8823423385620117, - "learning_rate": 3.1569393612569766e-05, - "loss": 0.6962, - "step": 125090 - }, - { - "epoch": 1.1059247865061264, - "grad_norm": 2.493934392929077, - "learning_rate": 3.1567920224897894e-05, - "loss": 0.5916, - "step": 125100 - }, - { - "epoch": 1.1060131897664385, - "grad_norm": 2.5145621299743652, - "learning_rate": 3.156644683722603e-05, - "loss": 0.5304, - "step": 125110 - }, - { - "epoch": 1.1061015930267508, - "grad_norm": 2.5019047260284424, - "learning_rate": 3.156497344955415e-05, - "loss": 0.5764, - "step": 125120 - }, - { - "epoch": 1.1061899962870632, - "grad_norm": 7.4171319007873535, - "learning_rate": 3.1563500061882286e-05, - "loss": 0.5856, - "step": 125130 - }, - { - "epoch": 1.1062783995473753, - "grad_norm": 3.224748373031616, - "learning_rate": 3.1562026674210414e-05, - "loss": 0.5969, - "step": 125140 - }, - { - "epoch": 1.1063668028076876, - "grad_norm": 1.4299430847167969, - "learning_rate": 3.156055328653854e-05, - "loss": 0.618, - "step": 125150 - }, - { - "epoch": 1.1064552060679997, - "grad_norm": 23.758073806762695, - "learning_rate": 3.155907989886667e-05, - "loss": 0.6106, - "step": 125160 - }, - { - "epoch": 1.106543609328312, - "grad_norm": 3.710561513900757, - "learning_rate": 3.15576065111948e-05, - "loss": 0.635, - "step": 125170 - }, - { - "epoch": 1.1066320125886242, - "grad_norm": 2.1364290714263916, - "learning_rate": 3.155613312352293e-05, - "loss": 0.5749, - "step": 125180 - }, - { - "epoch": 1.1067204158489365, - "grad_norm": 3.6300723552703857, - "learning_rate": 3.155465973585106e-05, - "loss": 0.5514, - "step": 125190 - }, - { - "epoch": 1.1068088191092487, - "grad_norm": 1.935792088508606, - "learning_rate": 3.1553186348179184e-05, - "loss": 0.4761, - "step": 125200 - }, - { - "epoch": 1.106897222369561, - "grad_norm": 2.8274099826812744, - "learning_rate": 3.155171296050732e-05, - "loss": 0.6517, - "step": 125210 - }, - { - "epoch": 1.1069856256298731, - "grad_norm": 1.3117761611938477, - "learning_rate": 3.155023957283545e-05, - "loss": 0.5541, - "step": 125220 - }, - { - "epoch": 1.1070740288901855, - "grad_norm": 1.588179111480713, - "learning_rate": 3.1548766185163576e-05, - "loss": 0.6889, - "step": 125230 - }, - { - "epoch": 1.1071624321504978, - "grad_norm": 1.6698071956634521, - "learning_rate": 3.1547292797491705e-05, - "loss": 0.5234, - "step": 125240 - }, - { - "epoch": 1.10725083541081, - "grad_norm": 1.8297425508499146, - "learning_rate": 3.154581940981984e-05, - "loss": 0.5914, - "step": 125250 - }, - { - "epoch": 1.1073392386711223, - "grad_norm": 3.630329132080078, - "learning_rate": 3.154434602214796e-05, - "loss": 0.7428, - "step": 125260 - }, - { - "epoch": 1.1074276419314344, - "grad_norm": 2.250415086746216, - "learning_rate": 3.1542872634476097e-05, - "loss": 0.5794, - "step": 125270 - }, - { - "epoch": 1.1075160451917467, - "grad_norm": 1.8166521787643433, - "learning_rate": 3.1541399246804225e-05, - "loss": 0.5565, - "step": 125280 - }, - { - "epoch": 1.1076044484520589, - "grad_norm": 2.053131341934204, - "learning_rate": 3.153992585913235e-05, - "loss": 0.5952, - "step": 125290 - }, - { - "epoch": 1.1076928517123712, - "grad_norm": 3.1434459686279297, - "learning_rate": 3.153845247146048e-05, - "loss": 0.6141, - "step": 125300 - }, - { - "epoch": 1.1077812549726833, - "grad_norm": 1.8760188817977905, - "learning_rate": 3.153697908378861e-05, - "loss": 0.6268, - "step": 125310 - }, - { - "epoch": 1.1078696582329957, - "grad_norm": 2.238780975341797, - "learning_rate": 3.153550569611674e-05, - "loss": 0.6903, - "step": 125320 - }, - { - "epoch": 1.1079580614933078, - "grad_norm": 0.7598658204078674, - "learning_rate": 3.1534032308444873e-05, - "loss": 0.6129, - "step": 125330 - }, - { - "epoch": 1.1080464647536201, - "grad_norm": 6.6462273597717285, - "learning_rate": 3.1532558920772995e-05, - "loss": 0.5791, - "step": 125340 - }, - { - "epoch": 1.1081348680139325, - "grad_norm": 5.928302764892578, - "learning_rate": 3.153108553310113e-05, - "loss": 0.5833, - "step": 125350 - }, - { - "epoch": 1.1082232712742446, - "grad_norm": 8.749913215637207, - "learning_rate": 3.152961214542926e-05, - "loss": 0.789, - "step": 125360 - }, - { - "epoch": 1.108311674534557, - "grad_norm": 3.7648115158081055, - "learning_rate": 3.152813875775739e-05, - "loss": 0.6107, - "step": 125370 - }, - { - "epoch": 1.108400077794869, - "grad_norm": 4.305647850036621, - "learning_rate": 3.1526665370085515e-05, - "loss": 0.6218, - "step": 125380 - }, - { - "epoch": 1.1084884810551814, - "grad_norm": 1.4181492328643799, - "learning_rate": 3.152519198241365e-05, - "loss": 0.6031, - "step": 125390 - }, - { - "epoch": 1.1085768843154935, - "grad_norm": 1.7940001487731934, - "learning_rate": 3.152371859474177e-05, - "loss": 0.5557, - "step": 125400 - }, - { - "epoch": 1.1086652875758058, - "grad_norm": 1.1032383441925049, - "learning_rate": 3.152224520706991e-05, - "loss": 0.629, - "step": 125410 - }, - { - "epoch": 1.108753690836118, - "grad_norm": 2.5060174465179443, - "learning_rate": 3.152077181939803e-05, - "loss": 0.5312, - "step": 125420 - }, - { - "epoch": 1.1088420940964303, - "grad_norm": 1.2104655504226685, - "learning_rate": 3.1519298431726164e-05, - "loss": 0.5527, - "step": 125430 - }, - { - "epoch": 1.1089304973567424, - "grad_norm": 5.633395195007324, - "learning_rate": 3.151782504405429e-05, - "loss": 0.5279, - "step": 125440 - }, - { - "epoch": 1.1090189006170548, - "grad_norm": 3.9315385818481445, - "learning_rate": 3.151635165638242e-05, - "loss": 0.546, - "step": 125450 - }, - { - "epoch": 1.1091073038773671, - "grad_norm": 1.255246877670288, - "learning_rate": 3.151487826871055e-05, - "loss": 0.5867, - "step": 125460 - }, - { - "epoch": 1.1091957071376792, - "grad_norm": 1.3848497867584229, - "learning_rate": 3.1513404881038684e-05, - "loss": 0.5623, - "step": 125470 - }, - { - "epoch": 1.1092841103979916, - "grad_norm": 3.3597869873046875, - "learning_rate": 3.1511931493366805e-05, - "loss": 0.6361, - "step": 125480 - }, - { - "epoch": 1.1093725136583037, - "grad_norm": 3.975618362426758, - "learning_rate": 3.151045810569494e-05, - "loss": 0.5909, - "step": 125490 - }, - { - "epoch": 1.109460916918616, - "grad_norm": 12.802530288696289, - "learning_rate": 3.150898471802307e-05, - "loss": 0.5648, - "step": 125500 - }, - { - "epoch": 1.1095493201789282, - "grad_norm": 2.30096435546875, - "learning_rate": 3.15075113303512e-05, - "loss": 0.5542, - "step": 125510 - }, - { - "epoch": 1.1096377234392405, - "grad_norm": 8.658194541931152, - "learning_rate": 3.1506037942679326e-05, - "loss": 0.7332, - "step": 125520 - }, - { - "epoch": 1.1097261266995526, - "grad_norm": 13.195569038391113, - "learning_rate": 3.1504564555007454e-05, - "loss": 0.5939, - "step": 125530 - }, - { - "epoch": 1.109814529959865, - "grad_norm": 3.1111979484558105, - "learning_rate": 3.150309116733558e-05, - "loss": 0.6388, - "step": 125540 - }, - { - "epoch": 1.109902933220177, - "grad_norm": 4.646438121795654, - "learning_rate": 3.150161777966372e-05, - "loss": 0.5267, - "step": 125550 - }, - { - "epoch": 1.1099913364804894, - "grad_norm": 16.754776000976562, - "learning_rate": 3.1500144391991846e-05, - "loss": 0.604, - "step": 125560 - }, - { - "epoch": 1.1100797397408015, - "grad_norm": 2.3517396450042725, - "learning_rate": 3.1498671004319974e-05, - "loss": 0.5682, - "step": 125570 - }, - { - "epoch": 1.1101681430011139, - "grad_norm": 1.9671168327331543, - "learning_rate": 3.14971976166481e-05, - "loss": 0.6677, - "step": 125580 - }, - { - "epoch": 1.1102565462614262, - "grad_norm": 3.8956871032714844, - "learning_rate": 3.149572422897623e-05, - "loss": 0.5748, - "step": 125590 - }, - { - "epoch": 1.1103449495217383, - "grad_norm": 2.6497509479522705, - "learning_rate": 3.149425084130436e-05, - "loss": 0.5924, - "step": 125600 - }, - { - "epoch": 1.1104333527820507, - "grad_norm": 3.2031991481781006, - "learning_rate": 3.1492777453632494e-05, - "loss": 0.6475, - "step": 125610 - }, - { - "epoch": 1.1105217560423628, - "grad_norm": 4.9300336837768555, - "learning_rate": 3.149130406596062e-05, - "loss": 0.6979, - "step": 125620 - }, - { - "epoch": 1.1106101593026751, - "grad_norm": 10.219849586486816, - "learning_rate": 3.148983067828875e-05, - "loss": 0.6738, - "step": 125630 - }, - { - "epoch": 1.1106985625629873, - "grad_norm": 4.082695960998535, - "learning_rate": 3.148835729061688e-05, - "loss": 0.5604, - "step": 125640 - }, - { - "epoch": 1.1107869658232996, - "grad_norm": 1.997944712638855, - "learning_rate": 3.148688390294501e-05, - "loss": 0.6651, - "step": 125650 - }, - { - "epoch": 1.1108753690836117, - "grad_norm": 6.413634777069092, - "learning_rate": 3.1485410515273136e-05, - "loss": 0.4856, - "step": 125660 - }, - { - "epoch": 1.110963772343924, - "grad_norm": 1.8265559673309326, - "learning_rate": 3.1483937127601265e-05, - "loss": 0.5785, - "step": 125670 - }, - { - "epoch": 1.1110521756042362, - "grad_norm": 1.9175828695297241, - "learning_rate": 3.14824637399294e-05, - "loss": 0.5205, - "step": 125680 - }, - { - "epoch": 1.1111405788645485, - "grad_norm": 16.259056091308594, - "learning_rate": 3.148099035225753e-05, - "loss": 0.5791, - "step": 125690 - }, - { - "epoch": 1.1112289821248607, - "grad_norm": 7.223111152648926, - "learning_rate": 3.1479516964585656e-05, - "loss": 0.5815, - "step": 125700 - }, - { - "epoch": 1.111317385385173, - "grad_norm": 1.3366649150848389, - "learning_rate": 3.1478043576913785e-05, - "loss": 0.6147, - "step": 125710 - }, - { - "epoch": 1.1114057886454853, - "grad_norm": 3.212587356567383, - "learning_rate": 3.147657018924191e-05, - "loss": 0.7404, - "step": 125720 - }, - { - "epoch": 1.1114941919057975, - "grad_norm": 2.8107106685638428, - "learning_rate": 3.147509680157004e-05, - "loss": 0.6225, - "step": 125730 - }, - { - "epoch": 1.1115825951661098, - "grad_norm": 1.9735573530197144, - "learning_rate": 3.1473623413898177e-05, - "loss": 0.69, - "step": 125740 - }, - { - "epoch": 1.111670998426422, - "grad_norm": 1.473301887512207, - "learning_rate": 3.1472150026226305e-05, - "loss": 0.5122, - "step": 125750 - }, - { - "epoch": 1.1117594016867343, - "grad_norm": 3.5634231567382812, - "learning_rate": 3.147067663855443e-05, - "loss": 0.6575, - "step": 125760 - }, - { - "epoch": 1.1118478049470464, - "grad_norm": 2.6562602519989014, - "learning_rate": 3.146920325088256e-05, - "loss": 0.5978, - "step": 125770 - }, - { - "epoch": 1.1119362082073587, - "grad_norm": 6.751959800720215, - "learning_rate": 3.146772986321069e-05, - "loss": 0.6665, - "step": 125780 - }, - { - "epoch": 1.1120246114676708, - "grad_norm": 1.9707025289535522, - "learning_rate": 3.146625647553882e-05, - "loss": 0.6024, - "step": 125790 - }, - { - "epoch": 1.1121130147279832, - "grad_norm": 9.072181701660156, - "learning_rate": 3.1464783087866953e-05, - "loss": 0.7038, - "step": 125800 - }, - { - "epoch": 1.1122014179882953, - "grad_norm": 2.1578590869903564, - "learning_rate": 3.1463309700195075e-05, - "loss": 0.584, - "step": 125810 - }, - { - "epoch": 1.1122898212486076, - "grad_norm": 2.9197070598602295, - "learning_rate": 3.146183631252321e-05, - "loss": 0.4763, - "step": 125820 - }, - { - "epoch": 1.11237822450892, - "grad_norm": 3.019852638244629, - "learning_rate": 3.146036292485134e-05, - "loss": 0.7298, - "step": 125830 - }, - { - "epoch": 1.112466627769232, - "grad_norm": 1.4384427070617676, - "learning_rate": 3.145888953717947e-05, - "loss": 0.7194, - "step": 125840 - }, - { - "epoch": 1.1125550310295445, - "grad_norm": 3.314427614212036, - "learning_rate": 3.1457416149507595e-05, - "loss": 0.5566, - "step": 125850 - }, - { - "epoch": 1.1126434342898566, - "grad_norm": 1.7390302419662476, - "learning_rate": 3.145594276183573e-05, - "loss": 0.6769, - "step": 125860 - }, - { - "epoch": 1.112731837550169, - "grad_norm": 3.1712758541107178, - "learning_rate": 3.145446937416385e-05, - "loss": 0.5637, - "step": 125870 - }, - { - "epoch": 1.112820240810481, - "grad_norm": 3.294419527053833, - "learning_rate": 3.145299598649199e-05, - "loss": 0.633, - "step": 125880 - }, - { - "epoch": 1.1129086440707934, - "grad_norm": 3.6876602172851562, - "learning_rate": 3.145152259882011e-05, - "loss": 0.6303, - "step": 125890 - }, - { - "epoch": 1.1129970473311055, - "grad_norm": 1.4310834407806396, - "learning_rate": 3.1450049211148244e-05, - "loss": 0.7808, - "step": 125900 - }, - { - "epoch": 1.1130854505914178, - "grad_norm": 1.7381114959716797, - "learning_rate": 3.144857582347637e-05, - "loss": 0.711, - "step": 125910 - }, - { - "epoch": 1.11317385385173, - "grad_norm": 7.087277412414551, - "learning_rate": 3.14471024358045e-05, - "loss": 0.5593, - "step": 125920 - }, - { - "epoch": 1.1132622571120423, - "grad_norm": 9.185232162475586, - "learning_rate": 3.144562904813263e-05, - "loss": 0.6842, - "step": 125930 - }, - { - "epoch": 1.1133506603723546, - "grad_norm": 6.922568321228027, - "learning_rate": 3.1444155660460764e-05, - "loss": 0.732, - "step": 125940 - }, - { - "epoch": 1.1134390636326668, - "grad_norm": 2.6085684299468994, - "learning_rate": 3.1442682272788886e-05, - "loss": 0.5931, - "step": 125950 - }, - { - "epoch": 1.113527466892979, - "grad_norm": 13.312128067016602, - "learning_rate": 3.144120888511702e-05, - "loss": 0.5794, - "step": 125960 - }, - { - "epoch": 1.1136158701532912, - "grad_norm": 0.8146817088127136, - "learning_rate": 3.143973549744515e-05, - "loss": 0.6911, - "step": 125970 - }, - { - "epoch": 1.1137042734136036, - "grad_norm": 2.7889797687530518, - "learning_rate": 3.143826210977328e-05, - "loss": 0.5913, - "step": 125980 - }, - { - "epoch": 1.1137926766739157, - "grad_norm": 2.0145294666290283, - "learning_rate": 3.1436788722101406e-05, - "loss": 0.6094, - "step": 125990 - }, - { - "epoch": 1.113881079934228, - "grad_norm": 8.28034782409668, - "learning_rate": 3.1435315334429534e-05, - "loss": 0.4998, - "step": 126000 - }, - { - "epoch": 1.1139694831945401, - "grad_norm": 14.785907745361328, - "learning_rate": 3.143384194675766e-05, - "loss": 0.6574, - "step": 126010 - }, - { - "epoch": 1.1140578864548525, - "grad_norm": 2.148918628692627, - "learning_rate": 3.14323685590858e-05, - "loss": 0.508, - "step": 126020 - }, - { - "epoch": 1.1141462897151646, - "grad_norm": 2.3937630653381348, - "learning_rate": 3.143089517141392e-05, - "loss": 0.608, - "step": 126030 - }, - { - "epoch": 1.114234692975477, - "grad_norm": 5.709342956542969, - "learning_rate": 3.1429421783742054e-05, - "loss": 0.6393, - "step": 126040 - }, - { - "epoch": 1.1143230962357893, - "grad_norm": 5.312056541442871, - "learning_rate": 3.142794839607018e-05, - "loss": 0.6536, - "step": 126050 - }, - { - "epoch": 1.1144114994961014, - "grad_norm": 2.5917561054229736, - "learning_rate": 3.142647500839831e-05, - "loss": 0.5768, - "step": 126060 - }, - { - "epoch": 1.1144999027564138, - "grad_norm": 2.8331007957458496, - "learning_rate": 3.142500162072644e-05, - "loss": 0.6252, - "step": 126070 - }, - { - "epoch": 1.1145883060167259, - "grad_norm": 1.2347944974899292, - "learning_rate": 3.1423528233054574e-05, - "loss": 0.7996, - "step": 126080 - }, - { - "epoch": 1.1146767092770382, - "grad_norm": 8.965507507324219, - "learning_rate": 3.1422054845382696e-05, - "loss": 0.57, - "step": 126090 - }, - { - "epoch": 1.1147651125373503, - "grad_norm": 8.337733268737793, - "learning_rate": 3.142058145771083e-05, - "loss": 0.6087, - "step": 126100 - }, - { - "epoch": 1.1148535157976627, - "grad_norm": 1.861345887184143, - "learning_rate": 3.141910807003896e-05, - "loss": 0.5793, - "step": 126110 - }, - { - "epoch": 1.1149419190579748, - "grad_norm": 12.161194801330566, - "learning_rate": 3.141763468236709e-05, - "loss": 0.5958, - "step": 126120 - }, - { - "epoch": 1.1150303223182871, - "grad_norm": 1.917739748954773, - "learning_rate": 3.1416161294695216e-05, - "loss": 0.5545, - "step": 126130 - }, - { - "epoch": 1.1151187255785993, - "grad_norm": 2.5260403156280518, - "learning_rate": 3.1414687907023345e-05, - "loss": 0.5566, - "step": 126140 - }, - { - "epoch": 1.1152071288389116, - "grad_norm": 4.833311557769775, - "learning_rate": 3.141321451935147e-05, - "loss": 0.7167, - "step": 126150 - }, - { - "epoch": 1.1152955320992237, - "grad_norm": 2.2427449226379395, - "learning_rate": 3.141174113167961e-05, - "loss": 0.5197, - "step": 126160 - }, - { - "epoch": 1.115383935359536, - "grad_norm": 0.6076095700263977, - "learning_rate": 3.141026774400773e-05, - "loss": 0.5656, - "step": 126170 - }, - { - "epoch": 1.1154723386198484, - "grad_norm": 0.5763596296310425, - "learning_rate": 3.1408794356335865e-05, - "loss": 0.6521, - "step": 126180 - }, - { - "epoch": 1.1155607418801605, - "grad_norm": 2.0882694721221924, - "learning_rate": 3.140732096866399e-05, - "loss": 0.5895, - "step": 126190 - }, - { - "epoch": 1.1156491451404729, - "grad_norm": 2.8082191944122314, - "learning_rate": 3.140584758099212e-05, - "loss": 0.7213, - "step": 126200 - }, - { - "epoch": 1.115737548400785, - "grad_norm": 2.7710788249969482, - "learning_rate": 3.140437419332025e-05, - "loss": 0.6231, - "step": 126210 - }, - { - "epoch": 1.1158259516610973, - "grad_norm": 7.6033477783203125, - "learning_rate": 3.1402900805648385e-05, - "loss": 0.6203, - "step": 126220 - }, - { - "epoch": 1.1159143549214094, - "grad_norm": 2.884514093399048, - "learning_rate": 3.1401427417976507e-05, - "loss": 0.7096, - "step": 126230 - }, - { - "epoch": 1.1160027581817218, - "grad_norm": 1.7271358966827393, - "learning_rate": 3.139995403030464e-05, - "loss": 0.5324, - "step": 126240 - }, - { - "epoch": 1.116091161442034, - "grad_norm": 1.718198537826538, - "learning_rate": 3.139848064263276e-05, - "loss": 0.6442, - "step": 126250 - }, - { - "epoch": 1.1161795647023462, - "grad_norm": 1.9957566261291504, - "learning_rate": 3.13970072549609e-05, - "loss": 0.5736, - "step": 126260 - }, - { - "epoch": 1.1162679679626584, - "grad_norm": 5.794721603393555, - "learning_rate": 3.139553386728903e-05, - "loss": 0.7412, - "step": 126270 - }, - { - "epoch": 1.1163563712229707, - "grad_norm": 3.4659881591796875, - "learning_rate": 3.1394060479617155e-05, - "loss": 0.5739, - "step": 126280 - }, - { - "epoch": 1.1164447744832828, - "grad_norm": 5.785071849822998, - "learning_rate": 3.1392587091945283e-05, - "loss": 0.5969, - "step": 126290 - }, - { - "epoch": 1.1165331777435952, - "grad_norm": 3.6061861515045166, - "learning_rate": 3.139111370427342e-05, - "loss": 0.62, - "step": 126300 - }, - { - "epoch": 1.1166215810039075, - "grad_norm": 1.406785011291504, - "learning_rate": 3.138964031660154e-05, - "loss": 0.7136, - "step": 126310 - }, - { - "epoch": 1.1167099842642196, - "grad_norm": 2.6116490364074707, - "learning_rate": 3.1388166928929675e-05, - "loss": 0.5932, - "step": 126320 - }, - { - "epoch": 1.116798387524532, - "grad_norm": 1.9786701202392578, - "learning_rate": 3.1386693541257804e-05, - "loss": 0.734, - "step": 126330 - }, - { - "epoch": 1.116886790784844, - "grad_norm": 1.6138758659362793, - "learning_rate": 3.138522015358593e-05, - "loss": 0.5513, - "step": 126340 - }, - { - "epoch": 1.1169751940451564, - "grad_norm": 1.5725356340408325, - "learning_rate": 3.138374676591406e-05, - "loss": 0.5476, - "step": 126350 - }, - { - "epoch": 1.1170635973054686, - "grad_norm": 8.241376876831055, - "learning_rate": 3.138227337824219e-05, - "loss": 0.4881, - "step": 126360 - }, - { - "epoch": 1.117152000565781, - "grad_norm": 1.2967334985733032, - "learning_rate": 3.138079999057032e-05, - "loss": 0.5908, - "step": 126370 - }, - { - "epoch": 1.117240403826093, - "grad_norm": 2.218273401260376, - "learning_rate": 3.137932660289845e-05, - "loss": 0.6395, - "step": 126380 - }, - { - "epoch": 1.1173288070864054, - "grad_norm": 2.249335289001465, - "learning_rate": 3.1377853215226574e-05, - "loss": 0.5609, - "step": 126390 - }, - { - "epoch": 1.1174172103467175, - "grad_norm": 1.0351797342300415, - "learning_rate": 3.137637982755471e-05, - "loss": 0.5399, - "step": 126400 - }, - { - "epoch": 1.1175056136070298, - "grad_norm": 5.871886253356934, - "learning_rate": 3.137490643988284e-05, - "loss": 0.6555, - "step": 126410 - }, - { - "epoch": 1.1175940168673422, - "grad_norm": 2.120114326477051, - "learning_rate": 3.1373433052210966e-05, - "loss": 0.4715, - "step": 126420 - }, - { - "epoch": 1.1176824201276543, - "grad_norm": 2.8516831398010254, - "learning_rate": 3.1371959664539094e-05, - "loss": 0.568, - "step": 126430 - }, - { - "epoch": 1.1177708233879666, - "grad_norm": 4.082274436950684, - "learning_rate": 3.137048627686723e-05, - "loss": 0.6649, - "step": 126440 - }, - { - "epoch": 1.1178592266482787, - "grad_norm": 2.897552251815796, - "learning_rate": 3.136901288919535e-05, - "loss": 0.6458, - "step": 126450 - }, - { - "epoch": 1.117947629908591, - "grad_norm": 5.313582420349121, - "learning_rate": 3.1367539501523486e-05, - "loss": 0.626, - "step": 126460 - }, - { - "epoch": 1.1180360331689032, - "grad_norm": 1.5733404159545898, - "learning_rate": 3.1366066113851614e-05, - "loss": 0.5666, - "step": 126470 - }, - { - "epoch": 1.1181244364292156, - "grad_norm": 1.1127409934997559, - "learning_rate": 3.136459272617974e-05, - "loss": 0.5631, - "step": 126480 - }, - { - "epoch": 1.1182128396895277, - "grad_norm": 0.7725486159324646, - "learning_rate": 3.136311933850787e-05, - "loss": 0.5232, - "step": 126490 - }, - { - "epoch": 1.11830124294984, - "grad_norm": 9.888276100158691, - "learning_rate": 3.1361645950836e-05, - "loss": 0.6731, - "step": 126500 - }, - { - "epoch": 1.1183896462101521, - "grad_norm": 2.0208230018615723, - "learning_rate": 3.136017256316413e-05, - "loss": 0.6906, - "step": 126510 - }, - { - "epoch": 1.1184780494704645, - "grad_norm": 4.282047748565674, - "learning_rate": 3.135869917549226e-05, - "loss": 0.6936, - "step": 126520 - }, - { - "epoch": 1.1185664527307768, - "grad_norm": 7.722494602203369, - "learning_rate": 3.135722578782039e-05, - "loss": 0.6546, - "step": 126530 - }, - { - "epoch": 1.118654855991089, - "grad_norm": 1.8932750225067139, - "learning_rate": 3.135575240014852e-05, - "loss": 0.679, - "step": 126540 - }, - { - "epoch": 1.1187432592514013, - "grad_norm": 3.4825494289398193, - "learning_rate": 3.135427901247665e-05, - "loss": 0.5792, - "step": 126550 - }, - { - "epoch": 1.1188316625117134, - "grad_norm": 3.8179244995117188, - "learning_rate": 3.1352805624804776e-05, - "loss": 0.7298, - "step": 126560 - }, - { - "epoch": 1.1189200657720257, - "grad_norm": 11.020949363708496, - "learning_rate": 3.1351332237132904e-05, - "loss": 0.7908, - "step": 126570 - }, - { - "epoch": 1.1190084690323379, - "grad_norm": 2.8973608016967773, - "learning_rate": 3.134985884946104e-05, - "loss": 0.6241, - "step": 126580 - }, - { - "epoch": 1.1190968722926502, - "grad_norm": 15.375143051147461, - "learning_rate": 3.134838546178917e-05, - "loss": 0.6852, - "step": 126590 - }, - { - "epoch": 1.1191852755529623, - "grad_norm": 2.3051064014434814, - "learning_rate": 3.1346912074117296e-05, - "loss": 0.5439, - "step": 126600 - }, - { - "epoch": 1.1192736788132747, - "grad_norm": 1.1505138874053955, - "learning_rate": 3.1345438686445425e-05, - "loss": 0.7202, - "step": 126610 - }, - { - "epoch": 1.1193620820735868, - "grad_norm": 3.004833698272705, - "learning_rate": 3.134396529877355e-05, - "loss": 0.6213, - "step": 126620 - }, - { - "epoch": 1.1194504853338991, - "grad_norm": 5.324898719787598, - "learning_rate": 3.134249191110168e-05, - "loss": 0.512, - "step": 126630 - }, - { - "epoch": 1.1195388885942115, - "grad_norm": 1.4163658618927002, - "learning_rate": 3.134101852342981e-05, - "loss": 0.6827, - "step": 126640 - }, - { - "epoch": 1.1196272918545236, - "grad_norm": 1.3405177593231201, - "learning_rate": 3.1339545135757945e-05, - "loss": 0.69, - "step": 126650 - }, - { - "epoch": 1.119715695114836, - "grad_norm": 3.2913944721221924, - "learning_rate": 3.133807174808607e-05, - "loss": 0.7064, - "step": 126660 - }, - { - "epoch": 1.119804098375148, - "grad_norm": 0.9808671474456787, - "learning_rate": 3.13365983604142e-05, - "loss": 0.5889, - "step": 126670 - }, - { - "epoch": 1.1198925016354604, - "grad_norm": 2.1487343311309814, - "learning_rate": 3.133512497274233e-05, - "loss": 0.6175, - "step": 126680 - }, - { - "epoch": 1.1199809048957725, - "grad_norm": 2.7274739742279053, - "learning_rate": 3.133365158507046e-05, - "loss": 0.6296, - "step": 126690 - }, - { - "epoch": 1.1200693081560849, - "grad_norm": 0.8556886315345764, - "learning_rate": 3.133217819739859e-05, - "loss": 0.6017, - "step": 126700 - }, - { - "epoch": 1.120157711416397, - "grad_norm": 1.6036845445632935, - "learning_rate": 3.133070480972672e-05, - "loss": 0.6004, - "step": 126710 - }, - { - "epoch": 1.1202461146767093, - "grad_norm": 7.731258869171143, - "learning_rate": 3.132923142205484e-05, - "loss": 0.6642, - "step": 126720 - }, - { - "epoch": 1.1203345179370214, - "grad_norm": 1.877601146697998, - "learning_rate": 3.132775803438298e-05, - "loss": 0.7328, - "step": 126730 - }, - { - "epoch": 1.1204229211973338, - "grad_norm": 3.8008337020874023, - "learning_rate": 3.132628464671111e-05, - "loss": 0.6508, - "step": 126740 - }, - { - "epoch": 1.120511324457646, - "grad_norm": 2.1105246543884277, - "learning_rate": 3.1324811259039235e-05, - "loss": 0.6142, - "step": 126750 - }, - { - "epoch": 1.1205997277179582, - "grad_norm": 3.3778750896453857, - "learning_rate": 3.1323337871367364e-05, - "loss": 0.6446, - "step": 126760 - }, - { - "epoch": 1.1206881309782706, - "grad_norm": 3.2779958248138428, - "learning_rate": 3.13218644836955e-05, - "loss": 0.6537, - "step": 126770 - }, - { - "epoch": 1.1207765342385827, - "grad_norm": 6.633204936981201, - "learning_rate": 3.132039109602362e-05, - "loss": 0.5846, - "step": 126780 - }, - { - "epoch": 1.120864937498895, - "grad_norm": 10.813836097717285, - "learning_rate": 3.1318917708351755e-05, - "loss": 0.6043, - "step": 126790 - }, - { - "epoch": 1.1209533407592072, - "grad_norm": 4.237395286560059, - "learning_rate": 3.1317444320679884e-05, - "loss": 0.7342, - "step": 126800 - }, - { - "epoch": 1.1210417440195195, - "grad_norm": 4.222539901733398, - "learning_rate": 3.131597093300801e-05, - "loss": 0.5849, - "step": 126810 - }, - { - "epoch": 1.1211301472798316, - "grad_norm": 0.974381685256958, - "learning_rate": 3.131449754533614e-05, - "loss": 0.5039, - "step": 126820 - }, - { - "epoch": 1.121218550540144, - "grad_norm": 7.4011616706848145, - "learning_rate": 3.131302415766427e-05, - "loss": 0.5862, - "step": 126830 - }, - { - "epoch": 1.121306953800456, - "grad_norm": 3.8008170127868652, - "learning_rate": 3.13115507699924e-05, - "loss": 0.5368, - "step": 126840 - }, - { - "epoch": 1.1213953570607684, - "grad_norm": 4.988938808441162, - "learning_rate": 3.131007738232053e-05, - "loss": 0.5801, - "step": 126850 - }, - { - "epoch": 1.1214837603210805, - "grad_norm": 1.0936565399169922, - "learning_rate": 3.1308603994648654e-05, - "loss": 0.5127, - "step": 126860 - }, - { - "epoch": 1.121572163581393, - "grad_norm": 2.333862781524658, - "learning_rate": 3.130713060697679e-05, - "loss": 0.6937, - "step": 126870 - }, - { - "epoch": 1.121660566841705, - "grad_norm": 0.904512882232666, - "learning_rate": 3.130565721930492e-05, - "loss": 0.5786, - "step": 126880 - }, - { - "epoch": 1.1217489701020174, - "grad_norm": 2.9923555850982666, - "learning_rate": 3.1304183831633046e-05, - "loss": 0.522, - "step": 126890 - }, - { - "epoch": 1.1218373733623297, - "grad_norm": 11.812182426452637, - "learning_rate": 3.1302710443961174e-05, - "loss": 0.7455, - "step": 126900 - }, - { - "epoch": 1.1219257766226418, - "grad_norm": 2.903211832046509, - "learning_rate": 3.130123705628931e-05, - "loss": 0.5789, - "step": 126910 - }, - { - "epoch": 1.1220141798829542, - "grad_norm": 1.6600630283355713, - "learning_rate": 3.129976366861743e-05, - "loss": 0.5981, - "step": 126920 - }, - { - "epoch": 1.1221025831432663, - "grad_norm": 1.3097416162490845, - "learning_rate": 3.1298290280945566e-05, - "loss": 0.508, - "step": 126930 - }, - { - "epoch": 1.1221909864035786, - "grad_norm": 15.6178560256958, - "learning_rate": 3.129681689327369e-05, - "loss": 0.5887, - "step": 126940 - }, - { - "epoch": 1.1222793896638907, - "grad_norm": 1.5398316383361816, - "learning_rate": 3.129534350560182e-05, - "loss": 0.4815, - "step": 126950 - }, - { - "epoch": 1.122367792924203, - "grad_norm": 0.8039826154708862, - "learning_rate": 3.129387011792995e-05, - "loss": 0.5016, - "step": 126960 - }, - { - "epoch": 1.1224561961845152, - "grad_norm": 2.7974190711975098, - "learning_rate": 3.129239673025808e-05, - "loss": 0.6706, - "step": 126970 - }, - { - "epoch": 1.1225445994448275, - "grad_norm": 2.6173548698425293, - "learning_rate": 3.129092334258621e-05, - "loss": 0.8201, - "step": 126980 - }, - { - "epoch": 1.1226330027051397, - "grad_norm": 3.0918893814086914, - "learning_rate": 3.128944995491434e-05, - "loss": 0.6088, - "step": 126990 - }, - { - "epoch": 1.122721405965452, - "grad_norm": 2.8781216144561768, - "learning_rate": 3.1287976567242464e-05, - "loss": 0.5678, - "step": 127000 - }, - { - "epoch": 1.1228098092257643, - "grad_norm": 1.210296630859375, - "learning_rate": 3.12865031795706e-05, - "loss": 0.6697, - "step": 127010 - }, - { - "epoch": 1.1228982124860765, - "grad_norm": 2.7205042839050293, - "learning_rate": 3.128502979189873e-05, - "loss": 0.5873, - "step": 127020 - }, - { - "epoch": 1.1229866157463888, - "grad_norm": 1.9096770286560059, - "learning_rate": 3.1283556404226856e-05, - "loss": 0.6304, - "step": 127030 - }, - { - "epoch": 1.123075019006701, - "grad_norm": 1.5841436386108398, - "learning_rate": 3.1282083016554985e-05, - "loss": 0.6046, - "step": 127040 - }, - { - "epoch": 1.1231634222670133, - "grad_norm": 4.161255836486816, - "learning_rate": 3.128060962888312e-05, - "loss": 0.6643, - "step": 127050 - }, - { - "epoch": 1.1232518255273254, - "grad_norm": 5.838593482971191, - "learning_rate": 3.127913624121124e-05, - "loss": 0.6965, - "step": 127060 - }, - { - "epoch": 1.1233402287876377, - "grad_norm": 5.136905670166016, - "learning_rate": 3.1277662853539376e-05, - "loss": 0.6932, - "step": 127070 - }, - { - "epoch": 1.1234286320479498, - "grad_norm": 13.633052825927734, - "learning_rate": 3.12761894658675e-05, - "loss": 0.4809, - "step": 127080 - }, - { - "epoch": 1.1235170353082622, - "grad_norm": 2.246755361557007, - "learning_rate": 3.127471607819563e-05, - "loss": 0.7049, - "step": 127090 - }, - { - "epoch": 1.1236054385685743, - "grad_norm": 1.7412654161453247, - "learning_rate": 3.127324269052376e-05, - "loss": 0.6733, - "step": 127100 - }, - { - "epoch": 1.1236938418288867, - "grad_norm": 6.824997901916504, - "learning_rate": 3.127176930285189e-05, - "loss": 0.6276, - "step": 127110 - }, - { - "epoch": 1.123782245089199, - "grad_norm": 4.419140815734863, - "learning_rate": 3.127029591518002e-05, - "loss": 0.6596, - "step": 127120 - }, - { - "epoch": 1.1238706483495111, - "grad_norm": 13.504050254821777, - "learning_rate": 3.126882252750815e-05, - "loss": 0.6391, - "step": 127130 - }, - { - "epoch": 1.1239590516098235, - "grad_norm": 3.6946518421173096, - "learning_rate": 3.1267349139836275e-05, - "loss": 0.5094, - "step": 127140 - }, - { - "epoch": 1.1240474548701356, - "grad_norm": 2.123535633087158, - "learning_rate": 3.126587575216441e-05, - "loss": 0.7253, - "step": 127150 - }, - { - "epoch": 1.124135858130448, - "grad_norm": 7.622867584228516, - "learning_rate": 3.126440236449254e-05, - "loss": 0.6652, - "step": 127160 - }, - { - "epoch": 1.12422426139076, - "grad_norm": 2.0470130443573, - "learning_rate": 3.126292897682067e-05, - "loss": 0.5564, - "step": 127170 - }, - { - "epoch": 1.1243126646510724, - "grad_norm": 1.351466417312622, - "learning_rate": 3.1261455589148795e-05, - "loss": 0.5942, - "step": 127180 - }, - { - "epoch": 1.1244010679113845, - "grad_norm": 1.3055543899536133, - "learning_rate": 3.1259982201476923e-05, - "loss": 0.662, - "step": 127190 - }, - { - "epoch": 1.1244894711716968, - "grad_norm": 2.757878541946411, - "learning_rate": 3.125850881380505e-05, - "loss": 0.4989, - "step": 127200 - }, - { - "epoch": 1.124577874432009, - "grad_norm": 2.3031773567199707, - "learning_rate": 3.125703542613319e-05, - "loss": 0.6053, - "step": 127210 - }, - { - "epoch": 1.1246662776923213, - "grad_norm": 5.308856964111328, - "learning_rate": 3.125556203846131e-05, - "loss": 0.5899, - "step": 127220 - }, - { - "epoch": 1.1247546809526336, - "grad_norm": 1.7436944246292114, - "learning_rate": 3.1254088650789444e-05, - "loss": 0.6173, - "step": 127230 - }, - { - "epoch": 1.1248430842129458, - "grad_norm": 3.0537967681884766, - "learning_rate": 3.125261526311757e-05, - "loss": 0.6495, - "step": 127240 - }, - { - "epoch": 1.124931487473258, - "grad_norm": 7.111103057861328, - "learning_rate": 3.12511418754457e-05, - "loss": 0.5467, - "step": 127250 - }, - { - "epoch": 1.1250198907335702, - "grad_norm": 1.9732167720794678, - "learning_rate": 3.124966848777383e-05, - "loss": 0.6263, - "step": 127260 - }, - { - "epoch": 1.1251082939938826, - "grad_norm": 1.3944525718688965, - "learning_rate": 3.1248195100101964e-05, - "loss": 0.4412, - "step": 127270 - }, - { - "epoch": 1.1251966972541947, - "grad_norm": 1.6753472089767456, - "learning_rate": 3.1246721712430085e-05, - "loss": 0.5593, - "step": 127280 - }, - { - "epoch": 1.125285100514507, - "grad_norm": 3.7476272583007812, - "learning_rate": 3.124524832475822e-05, - "loss": 0.524, - "step": 127290 - }, - { - "epoch": 1.1253735037748192, - "grad_norm": 9.736187934875488, - "learning_rate": 3.124377493708634e-05, - "loss": 0.537, - "step": 127300 - }, - { - "epoch": 1.1254619070351315, - "grad_norm": 20.751432418823242, - "learning_rate": 3.124230154941448e-05, - "loss": 0.6067, - "step": 127310 - }, - { - "epoch": 1.1255503102954436, - "grad_norm": 3.3839869499206543, - "learning_rate": 3.1240828161742606e-05, - "loss": 0.6929, - "step": 127320 - }, - { - "epoch": 1.125638713555756, - "grad_norm": 3.6687798500061035, - "learning_rate": 3.1239354774070734e-05, - "loss": 0.5337, - "step": 127330 - }, - { - "epoch": 1.1257271168160683, - "grad_norm": 2.122966766357422, - "learning_rate": 3.123788138639886e-05, - "loss": 0.7601, - "step": 127340 - }, - { - "epoch": 1.1258155200763804, - "grad_norm": 2.464848279953003, - "learning_rate": 3.1236407998727e-05, - "loss": 0.4645, - "step": 127350 - }, - { - "epoch": 1.1259039233366925, - "grad_norm": 1.720076322555542, - "learning_rate": 3.123493461105512e-05, - "loss": 0.6076, - "step": 127360 - }, - { - "epoch": 1.1259923265970049, - "grad_norm": 1.5469862222671509, - "learning_rate": 3.1233461223383254e-05, - "loss": 0.6892, - "step": 127370 - }, - { - "epoch": 1.1260807298573172, - "grad_norm": 5.656920909881592, - "learning_rate": 3.123198783571138e-05, - "loss": 0.8294, - "step": 127380 - }, - { - "epoch": 1.1261691331176293, - "grad_norm": 2.652963638305664, - "learning_rate": 3.123051444803951e-05, - "loss": 0.6324, - "step": 127390 - }, - { - "epoch": 1.1262575363779417, - "grad_norm": 3.476199150085449, - "learning_rate": 3.122904106036764e-05, - "loss": 0.6096, - "step": 127400 - }, - { - "epoch": 1.1263459396382538, - "grad_norm": 2.9368913173675537, - "learning_rate": 3.122756767269577e-05, - "loss": 0.7013, - "step": 127410 - }, - { - "epoch": 1.1264343428985661, - "grad_norm": 1.9870730638504028, - "learning_rate": 3.1226094285023896e-05, - "loss": 0.6709, - "step": 127420 - }, - { - "epoch": 1.1265227461588783, - "grad_norm": 5.3724589347839355, - "learning_rate": 3.122462089735203e-05, - "loss": 0.7113, - "step": 127430 - }, - { - "epoch": 1.1266111494191906, - "grad_norm": 3.1432862281799316, - "learning_rate": 3.122314750968016e-05, - "loss": 0.6544, - "step": 127440 - }, - { - "epoch": 1.1266995526795027, - "grad_norm": 2.4781832695007324, - "learning_rate": 3.122167412200829e-05, - "loss": 0.6568, - "step": 127450 - }, - { - "epoch": 1.126787955939815, - "grad_norm": 3.0052313804626465, - "learning_rate": 3.1220200734336416e-05, - "loss": 0.5346, - "step": 127460 - }, - { - "epoch": 1.1268763592001272, - "grad_norm": 4.139371871948242, - "learning_rate": 3.1218727346664544e-05, - "loss": 0.7233, - "step": 127470 - }, - { - "epoch": 1.1269647624604395, - "grad_norm": 5.191629409790039, - "learning_rate": 3.121725395899267e-05, - "loss": 0.8899, - "step": 127480 - }, - { - "epoch": 1.1270531657207519, - "grad_norm": 1.8640124797821045, - "learning_rate": 3.121578057132081e-05, - "loss": 0.6398, - "step": 127490 - }, - { - "epoch": 1.127141568981064, - "grad_norm": 11.424607276916504, - "learning_rate": 3.1214307183648936e-05, - "loss": 0.6252, - "step": 127500 - }, - { - "epoch": 1.1272299722413763, - "grad_norm": 2.3524322509765625, - "learning_rate": 3.1212833795977065e-05, - "loss": 0.6719, - "step": 127510 - }, - { - "epoch": 1.1273183755016885, - "grad_norm": 5.033801555633545, - "learning_rate": 3.121136040830519e-05, - "loss": 0.6408, - "step": 127520 - }, - { - "epoch": 1.1274067787620008, - "grad_norm": 5.450212001800537, - "learning_rate": 3.120988702063332e-05, - "loss": 0.6145, - "step": 127530 - }, - { - "epoch": 1.127495182022313, - "grad_norm": 1.8799296617507935, - "learning_rate": 3.120841363296145e-05, - "loss": 0.6284, - "step": 127540 - }, - { - "epoch": 1.1275835852826253, - "grad_norm": 1.1824297904968262, - "learning_rate": 3.120694024528958e-05, - "loss": 0.5214, - "step": 127550 - }, - { - "epoch": 1.1276719885429374, - "grad_norm": 3.4856674671173096, - "learning_rate": 3.120546685761771e-05, - "loss": 0.6247, - "step": 127560 - }, - { - "epoch": 1.1277603918032497, - "grad_norm": 14.795614242553711, - "learning_rate": 3.120399346994584e-05, - "loss": 0.6804, - "step": 127570 - }, - { - "epoch": 1.1278487950635618, - "grad_norm": 2.6707940101623535, - "learning_rate": 3.120252008227397e-05, - "loss": 0.6733, - "step": 127580 - }, - { - "epoch": 1.1279371983238742, - "grad_norm": 2.520338535308838, - "learning_rate": 3.12010466946021e-05, - "loss": 0.6386, - "step": 127590 - }, - { - "epoch": 1.1280256015841865, - "grad_norm": 1.6713100671768188, - "learning_rate": 3.1199573306930227e-05, - "loss": 0.5881, - "step": 127600 - }, - { - "epoch": 1.1281140048444986, - "grad_norm": 3.222883462905884, - "learning_rate": 3.1198099919258355e-05, - "loss": 0.7449, - "step": 127610 - }, - { - "epoch": 1.128202408104811, - "grad_norm": 1.9076906442642212, - "learning_rate": 3.119662653158649e-05, - "loss": 0.582, - "step": 127620 - }, - { - "epoch": 1.128290811365123, - "grad_norm": 3.9604201316833496, - "learning_rate": 3.119515314391462e-05, - "loss": 0.6379, - "step": 127630 - }, - { - "epoch": 1.1283792146254354, - "grad_norm": 1.8760172128677368, - "learning_rate": 3.119367975624275e-05, - "loss": 0.6786, - "step": 127640 - }, - { - "epoch": 1.1284676178857476, - "grad_norm": 2.554670810699463, - "learning_rate": 3.1192206368570875e-05, - "loss": 0.5619, - "step": 127650 - }, - { - "epoch": 1.12855602114606, - "grad_norm": 2.782709836959839, - "learning_rate": 3.1190732980899003e-05, - "loss": 0.6666, - "step": 127660 - }, - { - "epoch": 1.128644424406372, - "grad_norm": 1.3999807834625244, - "learning_rate": 3.118925959322713e-05, - "loss": 0.6457, - "step": 127670 - }, - { - "epoch": 1.1287328276666844, - "grad_norm": 1.673561692237854, - "learning_rate": 3.118778620555527e-05, - "loss": 0.5272, - "step": 127680 - }, - { - "epoch": 1.1288212309269965, - "grad_norm": 12.835931777954102, - "learning_rate": 3.118631281788339e-05, - "loss": 0.7, - "step": 127690 - }, - { - "epoch": 1.1289096341873088, - "grad_norm": 3.948513984680176, - "learning_rate": 3.1184839430211524e-05, - "loss": 0.7694, - "step": 127700 - }, - { - "epoch": 1.1289980374476212, - "grad_norm": 1.4934343099594116, - "learning_rate": 3.118336604253965e-05, - "loss": 0.5405, - "step": 127710 - }, - { - "epoch": 1.1290864407079333, - "grad_norm": 3.815197467803955, - "learning_rate": 3.118189265486778e-05, - "loss": 0.5864, - "step": 127720 - }, - { - "epoch": 1.1291748439682456, - "grad_norm": 2.613705635070801, - "learning_rate": 3.118041926719591e-05, - "loss": 0.5741, - "step": 127730 - }, - { - "epoch": 1.1292632472285578, - "grad_norm": 18.398590087890625, - "learning_rate": 3.1178945879524044e-05, - "loss": 0.5663, - "step": 127740 - }, - { - "epoch": 1.12935165048887, - "grad_norm": 1.7807130813598633, - "learning_rate": 3.1177472491852165e-05, - "loss": 0.4899, - "step": 127750 - }, - { - "epoch": 1.1294400537491822, - "grad_norm": 7.4020233154296875, - "learning_rate": 3.11759991041803e-05, - "loss": 0.6467, - "step": 127760 - }, - { - "epoch": 1.1295284570094946, - "grad_norm": 9.795546531677246, - "learning_rate": 3.117452571650842e-05, - "loss": 0.5992, - "step": 127770 - }, - { - "epoch": 1.1296168602698067, - "grad_norm": 2.897505283355713, - "learning_rate": 3.117305232883656e-05, - "loss": 0.6233, - "step": 127780 - }, - { - "epoch": 1.129705263530119, - "grad_norm": 1.5370945930480957, - "learning_rate": 3.1171578941164686e-05, - "loss": 0.5322, - "step": 127790 - }, - { - "epoch": 1.1297936667904311, - "grad_norm": 3.2173004150390625, - "learning_rate": 3.1170105553492814e-05, - "loss": 0.6435, - "step": 127800 - }, - { - "epoch": 1.1298820700507435, - "grad_norm": 2.5702590942382812, - "learning_rate": 3.116863216582094e-05, - "loss": 0.5733, - "step": 127810 - }, - { - "epoch": 1.1299704733110558, - "grad_norm": 2.9192237854003906, - "learning_rate": 3.116715877814908e-05, - "loss": 0.5339, - "step": 127820 - }, - { - "epoch": 1.130058876571368, - "grad_norm": 5.5746846199035645, - "learning_rate": 3.11656853904772e-05, - "loss": 0.7213, - "step": 127830 - }, - { - "epoch": 1.1301472798316803, - "grad_norm": 2.588975667953491, - "learning_rate": 3.1164212002805334e-05, - "loss": 0.653, - "step": 127840 - }, - { - "epoch": 1.1302356830919924, - "grad_norm": 5.150849342346191, - "learning_rate": 3.116273861513346e-05, - "loss": 0.6385, - "step": 127850 - }, - { - "epoch": 1.1303240863523047, - "grad_norm": 5.099704742431641, - "learning_rate": 3.116126522746159e-05, - "loss": 0.5256, - "step": 127860 - }, - { - "epoch": 1.1304124896126169, - "grad_norm": 1.229253888130188, - "learning_rate": 3.115979183978972e-05, - "loss": 0.5803, - "step": 127870 - }, - { - "epoch": 1.1305008928729292, - "grad_norm": 5.609333515167236, - "learning_rate": 3.115831845211785e-05, - "loss": 0.6201, - "step": 127880 - }, - { - "epoch": 1.1305892961332413, - "grad_norm": 2.342172145843506, - "learning_rate": 3.1156845064445976e-05, - "loss": 0.6521, - "step": 127890 - }, - { - "epoch": 1.1306776993935537, - "grad_norm": 3.7136483192443848, - "learning_rate": 3.115537167677411e-05, - "loss": 0.6616, - "step": 127900 - }, - { - "epoch": 1.1307661026538658, - "grad_norm": 2.246790647506714, - "learning_rate": 3.115389828910223e-05, - "loss": 0.7396, - "step": 127910 - }, - { - "epoch": 1.1308545059141781, - "grad_norm": 11.692253112792969, - "learning_rate": 3.115242490143037e-05, - "loss": 0.5945, - "step": 127920 - }, - { - "epoch": 1.1309429091744905, - "grad_norm": 8.861778259277344, - "learning_rate": 3.1150951513758496e-05, - "loss": 0.6087, - "step": 127930 - }, - { - "epoch": 1.1310313124348026, - "grad_norm": 0.8729851841926575, - "learning_rate": 3.1149478126086625e-05, - "loss": 0.5115, - "step": 127940 - }, - { - "epoch": 1.1311197156951147, - "grad_norm": 3.291654109954834, - "learning_rate": 3.114800473841475e-05, - "loss": 0.5895, - "step": 127950 - }, - { - "epoch": 1.131208118955427, - "grad_norm": 5.798988342285156, - "learning_rate": 3.114653135074289e-05, - "loss": 0.612, - "step": 127960 - }, - { - "epoch": 1.1312965222157394, - "grad_norm": 1.1860740184783936, - "learning_rate": 3.114505796307101e-05, - "loss": 0.7366, - "step": 127970 - }, - { - "epoch": 1.1313849254760515, - "grad_norm": 1.4610323905944824, - "learning_rate": 3.1143584575399145e-05, - "loss": 0.6076, - "step": 127980 - }, - { - "epoch": 1.1314733287363639, - "grad_norm": 3.5780997276306152, - "learning_rate": 3.114211118772727e-05, - "loss": 0.6438, - "step": 127990 - }, - { - "epoch": 1.131561731996676, - "grad_norm": 7.238277912139893, - "learning_rate": 3.11406378000554e-05, - "loss": 0.5945, - "step": 128000 - }, - { - "epoch": 1.1316501352569883, - "grad_norm": 6.38936710357666, - "learning_rate": 3.113916441238353e-05, - "loss": 0.6412, - "step": 128010 - }, - { - "epoch": 1.1317385385173004, - "grad_norm": 2.3094916343688965, - "learning_rate": 3.113769102471166e-05, - "loss": 0.6005, - "step": 128020 - }, - { - "epoch": 1.1318269417776128, - "grad_norm": 5.772735595703125, - "learning_rate": 3.1136217637039786e-05, - "loss": 0.6199, - "step": 128030 - }, - { - "epoch": 1.131915345037925, - "grad_norm": 2.0689494609832764, - "learning_rate": 3.113474424936792e-05, - "loss": 0.5729, - "step": 128040 - }, - { - "epoch": 1.1320037482982372, - "grad_norm": 3.396052837371826, - "learning_rate": 3.113327086169604e-05, - "loss": 0.6487, - "step": 128050 - }, - { - "epoch": 1.1320921515585494, - "grad_norm": 1.5205445289611816, - "learning_rate": 3.113179747402418e-05, - "loss": 0.608, - "step": 128060 - }, - { - "epoch": 1.1321805548188617, - "grad_norm": 0.7985290288925171, - "learning_rate": 3.113032408635231e-05, - "loss": 0.4496, - "step": 128070 - }, - { - "epoch": 1.132268958079174, - "grad_norm": 2.381638288497925, - "learning_rate": 3.1128850698680435e-05, - "loss": 0.6879, - "step": 128080 - }, - { - "epoch": 1.1323573613394862, - "grad_norm": 3.3611629009246826, - "learning_rate": 3.112737731100856e-05, - "loss": 0.5327, - "step": 128090 - }, - { - "epoch": 1.1324457645997985, - "grad_norm": 1.7304800748825073, - "learning_rate": 3.11259039233367e-05, - "loss": 0.6686, - "step": 128100 - }, - { - "epoch": 1.1325341678601106, - "grad_norm": 8.254027366638184, - "learning_rate": 3.112443053566482e-05, - "loss": 0.5926, - "step": 128110 - }, - { - "epoch": 1.132622571120423, - "grad_norm": 4.4736857414245605, - "learning_rate": 3.1122957147992955e-05, - "loss": 0.5408, - "step": 128120 - }, - { - "epoch": 1.132710974380735, - "grad_norm": 1.0886543989181519, - "learning_rate": 3.112148376032108e-05, - "loss": 0.5942, - "step": 128130 - }, - { - "epoch": 1.1327993776410474, - "grad_norm": 1.0274347066879272, - "learning_rate": 3.112001037264921e-05, - "loss": 0.5788, - "step": 128140 - }, - { - "epoch": 1.1328877809013596, - "grad_norm": 1.0781298875808716, - "learning_rate": 3.111853698497734e-05, - "loss": 0.4986, - "step": 128150 - }, - { - "epoch": 1.132976184161672, - "grad_norm": 4.785043239593506, - "learning_rate": 3.111706359730547e-05, - "loss": 0.7426, - "step": 128160 - }, - { - "epoch": 1.133064587421984, - "grad_norm": 6.955599784851074, - "learning_rate": 3.11155902096336e-05, - "loss": 0.698, - "step": 128170 - }, - { - "epoch": 1.1331529906822964, - "grad_norm": 2.1085779666900635, - "learning_rate": 3.111411682196173e-05, - "loss": 0.5651, - "step": 128180 - }, - { - "epoch": 1.1332413939426087, - "grad_norm": 7.190761089324951, - "learning_rate": 3.1112643434289854e-05, - "loss": 0.5391, - "step": 128190 - }, - { - "epoch": 1.1333297972029208, - "grad_norm": 3.2585065364837646, - "learning_rate": 3.111117004661799e-05, - "loss": 0.5222, - "step": 128200 - }, - { - "epoch": 1.1334182004632332, - "grad_norm": 3.023618459701538, - "learning_rate": 3.110969665894612e-05, - "loss": 0.5605, - "step": 128210 - }, - { - "epoch": 1.1335066037235453, - "grad_norm": 1.328873634338379, - "learning_rate": 3.1108223271274246e-05, - "loss": 0.5163, - "step": 128220 - }, - { - "epoch": 1.1335950069838576, - "grad_norm": 1.7010133266448975, - "learning_rate": 3.1106749883602374e-05, - "loss": 0.5721, - "step": 128230 - }, - { - "epoch": 1.1336834102441697, - "grad_norm": 1.680161714553833, - "learning_rate": 3.11052764959305e-05, - "loss": 0.6808, - "step": 128240 - }, - { - "epoch": 1.133771813504482, - "grad_norm": 3.0349910259246826, - "learning_rate": 3.110380310825863e-05, - "loss": 0.7132, - "step": 128250 - }, - { - "epoch": 1.1338602167647942, - "grad_norm": 4.480554103851318, - "learning_rate": 3.1102329720586766e-05, - "loss": 0.6455, - "step": 128260 - }, - { - "epoch": 1.1339486200251065, - "grad_norm": 2.1163761615753174, - "learning_rate": 3.110085633291489e-05, - "loss": 0.5604, - "step": 128270 - }, - { - "epoch": 1.1340370232854187, - "grad_norm": 4.307466983795166, - "learning_rate": 3.109938294524302e-05, - "loss": 0.7485, - "step": 128280 - }, - { - "epoch": 1.134125426545731, - "grad_norm": 2.174879789352417, - "learning_rate": 3.109790955757115e-05, - "loss": 0.6301, - "step": 128290 - }, - { - "epoch": 1.1342138298060433, - "grad_norm": 2.4866321086883545, - "learning_rate": 3.109643616989928e-05, - "loss": 0.6528, - "step": 128300 - }, - { - "epoch": 1.1343022330663555, - "grad_norm": 2.4251863956451416, - "learning_rate": 3.109496278222741e-05, - "loss": 0.7295, - "step": 128310 - }, - { - "epoch": 1.1343906363266678, - "grad_norm": 13.800593376159668, - "learning_rate": 3.109348939455554e-05, - "loss": 0.6949, - "step": 128320 - }, - { - "epoch": 1.13447903958698, - "grad_norm": 10.20455265045166, - "learning_rate": 3.1092016006883664e-05, - "loss": 0.7361, - "step": 128330 - }, - { - "epoch": 1.1345674428472923, - "grad_norm": 4.577689170837402, - "learning_rate": 3.10905426192118e-05, - "loss": 0.6818, - "step": 128340 - }, - { - "epoch": 1.1346558461076044, - "grad_norm": 2.310227155685425, - "learning_rate": 3.108906923153993e-05, - "loss": 0.6335, - "step": 128350 - }, - { - "epoch": 1.1347442493679167, - "grad_norm": 2.1640915870666504, - "learning_rate": 3.1087595843868056e-05, - "loss": 0.5092, - "step": 128360 - }, - { - "epoch": 1.1348326526282289, - "grad_norm": 4.713624477386475, - "learning_rate": 3.1086122456196184e-05, - "loss": 0.5343, - "step": 128370 - }, - { - "epoch": 1.1349210558885412, - "grad_norm": 8.862934112548828, - "learning_rate": 3.108464906852431e-05, - "loss": 0.6609, - "step": 128380 - }, - { - "epoch": 1.1350094591488533, - "grad_norm": 5.741730213165283, - "learning_rate": 3.108317568085244e-05, - "loss": 0.4951, - "step": 128390 - }, - { - "epoch": 1.1350978624091657, - "grad_norm": 4.6546735763549805, - "learning_rate": 3.1081702293180576e-05, - "loss": 0.6988, - "step": 128400 - }, - { - "epoch": 1.135186265669478, - "grad_norm": 2.6714022159576416, - "learning_rate": 3.1080228905508705e-05, - "loss": 0.5887, - "step": 128410 - }, - { - "epoch": 1.1352746689297901, - "grad_norm": 1.811808466911316, - "learning_rate": 3.107875551783683e-05, - "loss": 0.4598, - "step": 128420 - }, - { - "epoch": 1.1353630721901025, - "grad_norm": 10.22468376159668, - "learning_rate": 3.107728213016496e-05, - "loss": 0.6492, - "step": 128430 - }, - { - "epoch": 1.1354514754504146, - "grad_norm": 8.852533340454102, - "learning_rate": 3.107580874249309e-05, - "loss": 0.5588, - "step": 128440 - }, - { - "epoch": 1.135539878710727, - "grad_norm": 2.12284255027771, - "learning_rate": 3.107433535482122e-05, - "loss": 0.7217, - "step": 128450 - }, - { - "epoch": 1.135628281971039, - "grad_norm": 1.8057159185409546, - "learning_rate": 3.107286196714935e-05, - "loss": 0.6407, - "step": 128460 - }, - { - "epoch": 1.1357166852313514, - "grad_norm": 2.4709341526031494, - "learning_rate": 3.107138857947748e-05, - "loss": 0.6377, - "step": 128470 - }, - { - "epoch": 1.1358050884916635, - "grad_norm": 9.284843444824219, - "learning_rate": 3.106991519180561e-05, - "loss": 0.6803, - "step": 128480 - }, - { - "epoch": 1.1358934917519758, - "grad_norm": 1.947967767715454, - "learning_rate": 3.106844180413374e-05, - "loss": 0.6463, - "step": 128490 - }, - { - "epoch": 1.135981895012288, - "grad_norm": 1.7554666996002197, - "learning_rate": 3.1066968416461867e-05, - "loss": 0.4648, - "step": 128500 - }, - { - "epoch": 1.1360702982726003, - "grad_norm": 0.9914371967315674, - "learning_rate": 3.1065495028789995e-05, - "loss": 0.4564, - "step": 128510 - }, - { - "epoch": 1.1361587015329127, - "grad_norm": 18.34630012512207, - "learning_rate": 3.106402164111812e-05, - "loss": 0.6756, - "step": 128520 - }, - { - "epoch": 1.1362471047932248, - "grad_norm": 4.659605026245117, - "learning_rate": 3.106254825344626e-05, - "loss": 0.5377, - "step": 128530 - }, - { - "epoch": 1.136335508053537, - "grad_norm": 1.9386022090911865, - "learning_rate": 3.106107486577439e-05, - "loss": 0.6784, - "step": 128540 - }, - { - "epoch": 1.1364239113138492, - "grad_norm": 1.648189902305603, - "learning_rate": 3.1059601478102515e-05, - "loss": 0.6302, - "step": 128550 - }, - { - "epoch": 1.1365123145741616, - "grad_norm": 2.0266330242156982, - "learning_rate": 3.1058128090430643e-05, - "loss": 0.6043, - "step": 128560 - }, - { - "epoch": 1.1366007178344737, - "grad_norm": 4.102417469024658, - "learning_rate": 3.105665470275877e-05, - "loss": 0.6162, - "step": 128570 - }, - { - "epoch": 1.136689121094786, - "grad_norm": 3.8699839115142822, - "learning_rate": 3.10551813150869e-05, - "loss": 0.6195, - "step": 128580 - }, - { - "epoch": 1.1367775243550982, - "grad_norm": 2.4275920391082764, - "learning_rate": 3.1053707927415035e-05, - "loss": 0.6614, - "step": 128590 - }, - { - "epoch": 1.1368659276154105, - "grad_norm": 4.222693920135498, - "learning_rate": 3.105223453974316e-05, - "loss": 0.5581, - "step": 128600 - }, - { - "epoch": 1.1369543308757226, - "grad_norm": 2.011685848236084, - "learning_rate": 3.105076115207129e-05, - "loss": 0.5598, - "step": 128610 - }, - { - "epoch": 1.137042734136035, - "grad_norm": 2.6063008308410645, - "learning_rate": 3.104928776439942e-05, - "loss": 0.6228, - "step": 128620 - }, - { - "epoch": 1.137131137396347, - "grad_norm": 4.440762996673584, - "learning_rate": 3.104781437672755e-05, - "loss": 0.665, - "step": 128630 - }, - { - "epoch": 1.1372195406566594, - "grad_norm": 3.1974143981933594, - "learning_rate": 3.104634098905568e-05, - "loss": 0.7242, - "step": 128640 - }, - { - "epoch": 1.1373079439169715, - "grad_norm": 1.4081751108169556, - "learning_rate": 3.104486760138381e-05, - "loss": 0.523, - "step": 128650 - }, - { - "epoch": 1.1373963471772839, - "grad_norm": 2.1216461658477783, - "learning_rate": 3.1043394213711934e-05, - "loss": 0.7082, - "step": 128660 - }, - { - "epoch": 1.1374847504375962, - "grad_norm": 1.7226834297180176, - "learning_rate": 3.104192082604007e-05, - "loss": 0.6275, - "step": 128670 - }, - { - "epoch": 1.1375731536979083, - "grad_norm": 2.572516441345215, - "learning_rate": 3.10404474383682e-05, - "loss": 0.6339, - "step": 128680 - }, - { - "epoch": 1.1376615569582207, - "grad_norm": 1.8610177040100098, - "learning_rate": 3.1038974050696326e-05, - "loss": 0.5018, - "step": 128690 - }, - { - "epoch": 1.1377499602185328, - "grad_norm": 1.7649964094161987, - "learning_rate": 3.1037500663024454e-05, - "loss": 0.7139, - "step": 128700 - }, - { - "epoch": 1.1378383634788451, - "grad_norm": 0.8393646478652954, - "learning_rate": 3.103602727535258e-05, - "loss": 0.6238, - "step": 128710 - }, - { - "epoch": 1.1379267667391573, - "grad_norm": 3.8245954513549805, - "learning_rate": 3.103455388768071e-05, - "loss": 0.5752, - "step": 128720 - }, - { - "epoch": 1.1380151699994696, - "grad_norm": 9.85477352142334, - "learning_rate": 3.1033080500008846e-05, - "loss": 0.7119, - "step": 128730 - }, - { - "epoch": 1.1381035732597817, - "grad_norm": 4.801961421966553, - "learning_rate": 3.103160711233697e-05, - "loss": 0.6302, - "step": 128740 - }, - { - "epoch": 1.138191976520094, - "grad_norm": 2.0827114582061768, - "learning_rate": 3.10301337246651e-05, - "loss": 0.5871, - "step": 128750 - }, - { - "epoch": 1.1382803797804062, - "grad_norm": 1.2859481573104858, - "learning_rate": 3.102866033699323e-05, - "loss": 0.5953, - "step": 128760 - }, - { - "epoch": 1.1383687830407185, - "grad_norm": 5.2473931312561035, - "learning_rate": 3.102718694932136e-05, - "loss": 0.7347, - "step": 128770 - }, - { - "epoch": 1.1384571863010309, - "grad_norm": 5.208019733428955, - "learning_rate": 3.102571356164949e-05, - "loss": 0.6063, - "step": 128780 - }, - { - "epoch": 1.138545589561343, - "grad_norm": 1.5457022190093994, - "learning_rate": 3.102424017397762e-05, - "loss": 0.7643, - "step": 128790 - }, - { - "epoch": 1.1386339928216553, - "grad_norm": 2.7987060546875, - "learning_rate": 3.1022766786305744e-05, - "loss": 0.5569, - "step": 128800 - }, - { - "epoch": 1.1387223960819675, - "grad_norm": 4.447319030761719, - "learning_rate": 3.102129339863388e-05, - "loss": 0.5638, - "step": 128810 - }, - { - "epoch": 1.1388107993422798, - "grad_norm": 0.7671382427215576, - "learning_rate": 3.101982001096201e-05, - "loss": 0.5687, - "step": 128820 - }, - { - "epoch": 1.138899202602592, - "grad_norm": 1.4144881963729858, - "learning_rate": 3.1018346623290136e-05, - "loss": 0.7059, - "step": 128830 - }, - { - "epoch": 1.1389876058629043, - "grad_norm": 1.6497530937194824, - "learning_rate": 3.1016873235618264e-05, - "loss": 0.608, - "step": 128840 - }, - { - "epoch": 1.1390760091232164, - "grad_norm": 2.7800369262695312, - "learning_rate": 3.101539984794639e-05, - "loss": 0.6789, - "step": 128850 - }, - { - "epoch": 1.1391644123835287, - "grad_norm": 3.221393346786499, - "learning_rate": 3.101392646027452e-05, - "loss": 0.6814, - "step": 128860 - }, - { - "epoch": 1.1392528156438408, - "grad_norm": 1.2543436288833618, - "learning_rate": 3.1012453072602656e-05, - "loss": 0.6242, - "step": 128870 - }, - { - "epoch": 1.1393412189041532, - "grad_norm": 2.0788819789886475, - "learning_rate": 3.101097968493078e-05, - "loss": 0.6054, - "step": 128880 - }, - { - "epoch": 1.1394296221644655, - "grad_norm": 2.0430173873901367, - "learning_rate": 3.100950629725891e-05, - "loss": 0.6817, - "step": 128890 - }, - { - "epoch": 1.1395180254247776, - "grad_norm": 1.8807059526443481, - "learning_rate": 3.100803290958704e-05, - "loss": 0.5536, - "step": 128900 - }, - { - "epoch": 1.13960642868509, - "grad_norm": 5.936976432800293, - "learning_rate": 3.100655952191517e-05, - "loss": 0.5814, - "step": 128910 - }, - { - "epoch": 1.139694831945402, - "grad_norm": 11.144386291503906, - "learning_rate": 3.10050861342433e-05, - "loss": 0.5457, - "step": 128920 - }, - { - "epoch": 1.1397832352057145, - "grad_norm": 1.185876488685608, - "learning_rate": 3.100361274657143e-05, - "loss": 0.6976, - "step": 128930 - }, - { - "epoch": 1.1398716384660266, - "grad_norm": 1.4058983325958252, - "learning_rate": 3.1002139358899555e-05, - "loss": 0.7023, - "step": 128940 - }, - { - "epoch": 1.139960041726339, - "grad_norm": 3.231215000152588, - "learning_rate": 3.100066597122769e-05, - "loss": 0.5315, - "step": 128950 - }, - { - "epoch": 1.140048444986651, - "grad_norm": 5.5960164070129395, - "learning_rate": 3.099919258355581e-05, - "loss": 0.6117, - "step": 128960 - }, - { - "epoch": 1.1401368482469634, - "grad_norm": 5.824815273284912, - "learning_rate": 3.099771919588395e-05, - "loss": 0.7051, - "step": 128970 - }, - { - "epoch": 1.1402252515072755, - "grad_norm": 4.238917827606201, - "learning_rate": 3.0996245808212075e-05, - "loss": 0.5019, - "step": 128980 - }, - { - "epoch": 1.1403136547675878, - "grad_norm": 1.0785647630691528, - "learning_rate": 3.09947724205402e-05, - "loss": 0.6906, - "step": 128990 - }, - { - "epoch": 1.1404020580279002, - "grad_norm": 2.795828104019165, - "learning_rate": 3.099329903286833e-05, - "loss": 0.627, - "step": 129000 - }, - { - "epoch": 1.1404904612882123, - "grad_norm": 5.802361488342285, - "learning_rate": 3.099182564519647e-05, - "loss": 0.4534, - "step": 129010 - }, - { - "epoch": 1.1405788645485246, - "grad_norm": 6.869480133056641, - "learning_rate": 3.099035225752459e-05, - "loss": 0.5706, - "step": 129020 - }, - { - "epoch": 1.1406672678088368, - "grad_norm": 0.9857071042060852, - "learning_rate": 3.0988878869852724e-05, - "loss": 0.5821, - "step": 129030 - }, - { - "epoch": 1.140755671069149, - "grad_norm": 2.4715664386749268, - "learning_rate": 3.098740548218085e-05, - "loss": 0.6485, - "step": 129040 - }, - { - "epoch": 1.1408440743294612, - "grad_norm": 9.887063026428223, - "learning_rate": 3.098593209450898e-05, - "loss": 0.581, - "step": 129050 - }, - { - "epoch": 1.1409324775897736, - "grad_norm": 4.94040060043335, - "learning_rate": 3.098445870683711e-05, - "loss": 0.6784, - "step": 129060 - }, - { - "epoch": 1.1410208808500857, - "grad_norm": 13.735101699829102, - "learning_rate": 3.098298531916524e-05, - "loss": 0.6245, - "step": 129070 - }, - { - "epoch": 1.141109284110398, - "grad_norm": 1.6162124872207642, - "learning_rate": 3.0981511931493365e-05, - "loss": 0.7017, - "step": 129080 - }, - { - "epoch": 1.1411976873707101, - "grad_norm": 5.409655570983887, - "learning_rate": 3.09800385438215e-05, - "loss": 0.5664, - "step": 129090 - }, - { - "epoch": 1.1412860906310225, - "grad_norm": 1.40962815284729, - "learning_rate": 3.097856515614962e-05, - "loss": 0.5777, - "step": 129100 - }, - { - "epoch": 1.1413744938913348, - "grad_norm": 10.75178050994873, - "learning_rate": 3.097709176847776e-05, - "loss": 0.7643, - "step": 129110 - }, - { - "epoch": 1.141462897151647, - "grad_norm": 2.588787794113159, - "learning_rate": 3.0975618380805885e-05, - "loss": 0.5151, - "step": 129120 - }, - { - "epoch": 1.141551300411959, - "grad_norm": 1.3616329431533813, - "learning_rate": 3.0974144993134014e-05, - "loss": 0.6456, - "step": 129130 - }, - { - "epoch": 1.1416397036722714, - "grad_norm": 9.743754386901855, - "learning_rate": 3.097267160546214e-05, - "loss": 0.7101, - "step": 129140 - }, - { - "epoch": 1.1417281069325838, - "grad_norm": 6.239378929138184, - "learning_rate": 3.097119821779028e-05, - "loss": 0.6694, - "step": 129150 - }, - { - "epoch": 1.1418165101928959, - "grad_norm": 3.233586549758911, - "learning_rate": 3.09697248301184e-05, - "loss": 0.6558, - "step": 129160 - }, - { - "epoch": 1.1419049134532082, - "grad_norm": 3.038461923599243, - "learning_rate": 3.0968251442446534e-05, - "loss": 0.6131, - "step": 129170 - }, - { - "epoch": 1.1419933167135203, - "grad_norm": 2.079636335372925, - "learning_rate": 3.0966778054774656e-05, - "loss": 0.5607, - "step": 129180 - }, - { - "epoch": 1.1420817199738327, - "grad_norm": 5.6674370765686035, - "learning_rate": 3.096530466710279e-05, - "loss": 0.698, - "step": 129190 - }, - { - "epoch": 1.1421701232341448, - "grad_norm": 1.4439177513122559, - "learning_rate": 3.096383127943092e-05, - "loss": 0.6141, - "step": 129200 - }, - { - "epoch": 1.1422585264944571, - "grad_norm": 1.557323694229126, - "learning_rate": 3.096235789175905e-05, - "loss": 0.6327, - "step": 129210 - }, - { - "epoch": 1.1423469297547695, - "grad_norm": 1.6232562065124512, - "learning_rate": 3.0960884504087176e-05, - "loss": 0.7038, - "step": 129220 - }, - { - "epoch": 1.1424353330150816, - "grad_norm": 1.7543247938156128, - "learning_rate": 3.095941111641531e-05, - "loss": 0.5627, - "step": 129230 - }, - { - "epoch": 1.1425237362753937, - "grad_norm": 1.365300178527832, - "learning_rate": 3.095793772874343e-05, - "loss": 0.643, - "step": 129240 - }, - { - "epoch": 1.142612139535706, - "grad_norm": 1.8272337913513184, - "learning_rate": 3.095646434107157e-05, - "loss": 0.6104, - "step": 129250 - }, - { - "epoch": 1.1427005427960184, - "grad_norm": 2.6397464275360107, - "learning_rate": 3.0954990953399696e-05, - "loss": 0.6511, - "step": 129260 - }, - { - "epoch": 1.1427889460563305, - "grad_norm": 4.559430122375488, - "learning_rate": 3.0953517565727824e-05, - "loss": 0.6548, - "step": 129270 - }, - { - "epoch": 1.1428773493166429, - "grad_norm": 2.175283670425415, - "learning_rate": 3.095204417805595e-05, - "loss": 0.6311, - "step": 129280 - }, - { - "epoch": 1.142965752576955, - "grad_norm": 1.8018583059310913, - "learning_rate": 3.095057079038409e-05, - "loss": 0.4994, - "step": 129290 - }, - { - "epoch": 1.1430541558372673, - "grad_norm": 1.4794366359710693, - "learning_rate": 3.094909740271221e-05, - "loss": 0.6195, - "step": 129300 - }, - { - "epoch": 1.1431425590975794, - "grad_norm": 1.8551850318908691, - "learning_rate": 3.0947624015040345e-05, - "loss": 0.6823, - "step": 129310 - }, - { - "epoch": 1.1432309623578918, - "grad_norm": 2.381648540496826, - "learning_rate": 3.094615062736847e-05, - "loss": 0.5987, - "step": 129320 - }, - { - "epoch": 1.143319365618204, - "grad_norm": 1.3825405836105347, - "learning_rate": 3.09446772396966e-05, - "loss": 0.7014, - "step": 129330 - }, - { - "epoch": 1.1434077688785163, - "grad_norm": 4.019542694091797, - "learning_rate": 3.094320385202473e-05, - "loss": 0.7687, - "step": 129340 - }, - { - "epoch": 1.1434961721388284, - "grad_norm": 7.227363586425781, - "learning_rate": 3.094173046435286e-05, - "loss": 0.5195, - "step": 129350 - }, - { - "epoch": 1.1435845753991407, - "grad_norm": 2.040146589279175, - "learning_rate": 3.0940257076680986e-05, - "loss": 0.6398, - "step": 129360 - }, - { - "epoch": 1.143672978659453, - "grad_norm": 1.8154550790786743, - "learning_rate": 3.093878368900912e-05, - "loss": 0.6957, - "step": 129370 - }, - { - "epoch": 1.1437613819197652, - "grad_norm": 5.321227073669434, - "learning_rate": 3.093731030133725e-05, - "loss": 0.5996, - "step": 129380 - }, - { - "epoch": 1.1438497851800775, - "grad_norm": 3.097691535949707, - "learning_rate": 3.093583691366538e-05, - "loss": 0.534, - "step": 129390 - }, - { - "epoch": 1.1439381884403896, - "grad_norm": 0.9084679484367371, - "learning_rate": 3.0934363525993506e-05, - "loss": 0.5166, - "step": 129400 - }, - { - "epoch": 1.144026591700702, - "grad_norm": 4.590011119842529, - "learning_rate": 3.0932890138321635e-05, - "loss": 0.6488, - "step": 129410 - }, - { - "epoch": 1.144114994961014, - "grad_norm": 2.5000808238983154, - "learning_rate": 3.093141675064976e-05, - "loss": 0.5468, - "step": 129420 - }, - { - "epoch": 1.1442033982213264, - "grad_norm": 4.600218296051025, - "learning_rate": 3.092994336297789e-05, - "loss": 0.7062, - "step": 129430 - }, - { - "epoch": 1.1442918014816386, - "grad_norm": 6.936985969543457, - "learning_rate": 3.092846997530603e-05, - "loss": 0.5943, - "step": 129440 - }, - { - "epoch": 1.144380204741951, - "grad_norm": 3.0413055419921875, - "learning_rate": 3.0926996587634155e-05, - "loss": 0.7356, - "step": 129450 - }, - { - "epoch": 1.144468608002263, - "grad_norm": 2.9137046337127686, - "learning_rate": 3.0925523199962283e-05, - "loss": 0.6307, - "step": 129460 - }, - { - "epoch": 1.1445570112625754, - "grad_norm": 3.0789620876312256, - "learning_rate": 3.092404981229041e-05, - "loss": 0.6423, - "step": 129470 - }, - { - "epoch": 1.1446454145228877, - "grad_norm": 1.3038885593414307, - "learning_rate": 3.092257642461854e-05, - "loss": 0.5895, - "step": 129480 - }, - { - "epoch": 1.1447338177831998, - "grad_norm": 11.934042930603027, - "learning_rate": 3.092110303694667e-05, - "loss": 0.5973, - "step": 129490 - }, - { - "epoch": 1.1448222210435122, - "grad_norm": 1.8368771076202393, - "learning_rate": 3.0919629649274804e-05, - "loss": 0.6139, - "step": 129500 - }, - { - "epoch": 1.1449106243038243, - "grad_norm": 1.5556972026824951, - "learning_rate": 3.091815626160293e-05, - "loss": 0.5033, - "step": 129510 - }, - { - "epoch": 1.1449990275641366, - "grad_norm": 1.673009991645813, - "learning_rate": 3.091668287393106e-05, - "loss": 0.7569, - "step": 129520 - }, - { - "epoch": 1.1450874308244487, - "grad_norm": 2.1208839416503906, - "learning_rate": 3.091520948625919e-05, - "loss": 0.4855, - "step": 129530 - }, - { - "epoch": 1.145175834084761, - "grad_norm": 1.4348490238189697, - "learning_rate": 3.091373609858732e-05, - "loss": 0.7457, - "step": 129540 - }, - { - "epoch": 1.1452642373450732, - "grad_norm": 2.5995562076568604, - "learning_rate": 3.0912262710915445e-05, - "loss": 0.7044, - "step": 129550 - }, - { - "epoch": 1.1453526406053856, - "grad_norm": 4.249809741973877, - "learning_rate": 3.091078932324358e-05, - "loss": 0.5688, - "step": 129560 - }, - { - "epoch": 1.1454410438656977, - "grad_norm": 2.1283082962036133, - "learning_rate": 3.09093159355717e-05, - "loss": 0.6918, - "step": 129570 - }, - { - "epoch": 1.14552944712601, - "grad_norm": 3.371137857437134, - "learning_rate": 3.090784254789984e-05, - "loss": 0.6069, - "step": 129580 - }, - { - "epoch": 1.1456178503863224, - "grad_norm": 2.4240896701812744, - "learning_rate": 3.0906369160227966e-05, - "loss": 0.536, - "step": 129590 - }, - { - "epoch": 1.1457062536466345, - "grad_norm": 2.010716676712036, - "learning_rate": 3.0904895772556094e-05, - "loss": 0.5669, - "step": 129600 - }, - { - "epoch": 1.1457946569069468, - "grad_norm": 3.1237964630126953, - "learning_rate": 3.090342238488422e-05, - "loss": 0.5819, - "step": 129610 - }, - { - "epoch": 1.145883060167259, - "grad_norm": 3.414632558822632, - "learning_rate": 3.090194899721236e-05, - "loss": 0.6437, - "step": 129620 - }, - { - "epoch": 1.1459714634275713, - "grad_norm": 13.128074645996094, - "learning_rate": 3.090047560954048e-05, - "loss": 0.6882, - "step": 129630 - }, - { - "epoch": 1.1460598666878834, - "grad_norm": 9.342219352722168, - "learning_rate": 3.0899002221868614e-05, - "loss": 0.6222, - "step": 129640 - }, - { - "epoch": 1.1461482699481957, - "grad_norm": 2.1535446643829346, - "learning_rate": 3.0897528834196736e-05, - "loss": 0.5809, - "step": 129650 - }, - { - "epoch": 1.1462366732085079, - "grad_norm": 9.27763843536377, - "learning_rate": 3.089605544652487e-05, - "loss": 0.5984, - "step": 129660 - }, - { - "epoch": 1.1463250764688202, - "grad_norm": 1.8992501497268677, - "learning_rate": 3.0894582058853e-05, - "loss": 0.5369, - "step": 129670 - }, - { - "epoch": 1.1464134797291323, - "grad_norm": 2.566413402557373, - "learning_rate": 3.089310867118113e-05, - "loss": 0.6067, - "step": 129680 - }, - { - "epoch": 1.1465018829894447, - "grad_norm": 1.1106303930282593, - "learning_rate": 3.0891635283509256e-05, - "loss": 0.7396, - "step": 129690 - }, - { - "epoch": 1.146590286249757, - "grad_norm": 1.3200767040252686, - "learning_rate": 3.089016189583739e-05, - "loss": 0.5962, - "step": 129700 - }, - { - "epoch": 1.1466786895100691, - "grad_norm": 1.7844996452331543, - "learning_rate": 3.088868850816551e-05, - "loss": 0.5788, - "step": 129710 - }, - { - "epoch": 1.1467670927703815, - "grad_norm": 2.092609167098999, - "learning_rate": 3.088721512049365e-05, - "loss": 0.5229, - "step": 129720 - }, - { - "epoch": 1.1468554960306936, - "grad_norm": 3.077134609222412, - "learning_rate": 3.0885741732821776e-05, - "loss": 0.6544, - "step": 129730 - }, - { - "epoch": 1.146943899291006, - "grad_norm": 14.96422004699707, - "learning_rate": 3.0884268345149904e-05, - "loss": 0.5527, - "step": 129740 - }, - { - "epoch": 1.147032302551318, - "grad_norm": 5.837390422821045, - "learning_rate": 3.088279495747803e-05, - "loss": 0.6023, - "step": 129750 - }, - { - "epoch": 1.1471207058116304, - "grad_norm": 2.97983455657959, - "learning_rate": 3.088132156980617e-05, - "loss": 0.6348, - "step": 129760 - }, - { - "epoch": 1.1472091090719425, - "grad_norm": 2.1689441204071045, - "learning_rate": 3.087984818213429e-05, - "loss": 0.6095, - "step": 129770 - }, - { - "epoch": 1.1472975123322549, - "grad_norm": 3.8452553749084473, - "learning_rate": 3.0878374794462425e-05, - "loss": 0.6871, - "step": 129780 - }, - { - "epoch": 1.147385915592567, - "grad_norm": 6.251856803894043, - "learning_rate": 3.0876901406790546e-05, - "loss": 0.6768, - "step": 129790 - }, - { - "epoch": 1.1474743188528793, - "grad_norm": 4.925457954406738, - "learning_rate": 3.087542801911868e-05, - "loss": 0.7806, - "step": 129800 - }, - { - "epoch": 1.1475627221131917, - "grad_norm": 4.261886119842529, - "learning_rate": 3.087395463144681e-05, - "loss": 0.6236, - "step": 129810 - }, - { - "epoch": 1.1476511253735038, - "grad_norm": 4.9891862869262695, - "learning_rate": 3.087248124377494e-05, - "loss": 0.5637, - "step": 129820 - }, - { - "epoch": 1.147739528633816, - "grad_norm": 4.105252265930176, - "learning_rate": 3.0871007856103066e-05, - "loss": 0.6118, - "step": 129830 - }, - { - "epoch": 1.1478279318941282, - "grad_norm": 2.1113710403442383, - "learning_rate": 3.08695344684312e-05, - "loss": 0.4702, - "step": 129840 - }, - { - "epoch": 1.1479163351544406, - "grad_norm": 9.12517261505127, - "learning_rate": 3.086806108075932e-05, - "loss": 0.4841, - "step": 129850 - }, - { - "epoch": 1.1480047384147527, - "grad_norm": 1.4305143356323242, - "learning_rate": 3.086658769308746e-05, - "loss": 0.4818, - "step": 129860 - }, - { - "epoch": 1.148093141675065, - "grad_norm": 1.1121559143066406, - "learning_rate": 3.0865114305415587e-05, - "loss": 0.6366, - "step": 129870 - }, - { - "epoch": 1.1481815449353772, - "grad_norm": 2.573345899581909, - "learning_rate": 3.0863640917743715e-05, - "loss": 0.7032, - "step": 129880 - }, - { - "epoch": 1.1482699481956895, - "grad_norm": 1.0719095468521118, - "learning_rate": 3.086216753007184e-05, - "loss": 0.4951, - "step": 129890 - }, - { - "epoch": 1.1483583514560016, - "grad_norm": 4.711009979248047, - "learning_rate": 3.086069414239997e-05, - "loss": 0.5405, - "step": 129900 - }, - { - "epoch": 1.148446754716314, - "grad_norm": 2.915990114212036, - "learning_rate": 3.08592207547281e-05, - "loss": 0.6023, - "step": 129910 - }, - { - "epoch": 1.148535157976626, - "grad_norm": 3.4980132579803467, - "learning_rate": 3.0857747367056235e-05, - "loss": 0.6093, - "step": 129920 - }, - { - "epoch": 1.1486235612369384, - "grad_norm": 3.3230602741241455, - "learning_rate": 3.085627397938436e-05, - "loss": 0.646, - "step": 129930 - }, - { - "epoch": 1.1487119644972505, - "grad_norm": 2.237851858139038, - "learning_rate": 3.085480059171249e-05, - "loss": 0.5086, - "step": 129940 - }, - { - "epoch": 1.148800367757563, - "grad_norm": 1.7973872423171997, - "learning_rate": 3.085332720404062e-05, - "loss": 0.5327, - "step": 129950 - }, - { - "epoch": 1.1488887710178752, - "grad_norm": 6.047544956207275, - "learning_rate": 3.085185381636875e-05, - "loss": 0.5857, - "step": 129960 - }, - { - "epoch": 1.1489771742781874, - "grad_norm": 1.8672459125518799, - "learning_rate": 3.085038042869688e-05, - "loss": 0.7111, - "step": 129970 - }, - { - "epoch": 1.1490655775384997, - "grad_norm": 1.6612030267715454, - "learning_rate": 3.084890704102501e-05, - "loss": 0.5196, - "step": 129980 - }, - { - "epoch": 1.1491539807988118, - "grad_norm": 4.564487457275391, - "learning_rate": 3.0847433653353134e-05, - "loss": 0.5518, - "step": 129990 - }, - { - "epoch": 1.1492423840591242, - "grad_norm": 1.430924892425537, - "learning_rate": 3.084596026568127e-05, - "loss": 0.5857, - "step": 130000 - }, - { - "epoch": 1.1493307873194363, - "grad_norm": 1.7929118871688843, - "learning_rate": 3.084448687800939e-05, - "loss": 0.5652, - "step": 130010 - }, - { - "epoch": 1.1494191905797486, - "grad_norm": 1.4924002885818481, - "learning_rate": 3.0843013490337525e-05, - "loss": 0.6504, - "step": 130020 - }, - { - "epoch": 1.1495075938400607, - "grad_norm": 5.29504919052124, - "learning_rate": 3.0841540102665654e-05, - "loss": 0.6984, - "step": 130030 - }, - { - "epoch": 1.149595997100373, - "grad_norm": 1.9065515995025635, - "learning_rate": 3.084006671499378e-05, - "loss": 0.6376, - "step": 130040 - }, - { - "epoch": 1.1496844003606852, - "grad_norm": 4.07283878326416, - "learning_rate": 3.083859332732191e-05, - "loss": 0.7325, - "step": 130050 - }, - { - "epoch": 1.1497728036209975, - "grad_norm": 1.8085373640060425, - "learning_rate": 3.0837119939650046e-05, - "loss": 0.5452, - "step": 130060 - }, - { - "epoch": 1.1498612068813099, - "grad_norm": 3.3446431159973145, - "learning_rate": 3.083564655197817e-05, - "loss": 0.5984, - "step": 130070 - }, - { - "epoch": 1.149949610141622, - "grad_norm": 1.0745528936386108, - "learning_rate": 3.08341731643063e-05, - "loss": 0.6564, - "step": 130080 - }, - { - "epoch": 1.1500380134019343, - "grad_norm": 3.416633129119873, - "learning_rate": 3.083269977663443e-05, - "loss": 0.6429, - "step": 130090 - }, - { - "epoch": 1.1501264166622465, - "grad_norm": 1.4294919967651367, - "learning_rate": 3.083122638896256e-05, - "loss": 0.5988, - "step": 130100 - }, - { - "epoch": 1.1502148199225588, - "grad_norm": 3.212697744369507, - "learning_rate": 3.082975300129069e-05, - "loss": 0.6303, - "step": 130110 - }, - { - "epoch": 1.150303223182871, - "grad_norm": 2.286608934402466, - "learning_rate": 3.0828279613618816e-05, - "loss": 0.6174, - "step": 130120 - }, - { - "epoch": 1.1503916264431833, - "grad_norm": 1.804093837738037, - "learning_rate": 3.0826806225946944e-05, - "loss": 0.7295, - "step": 130130 - }, - { - "epoch": 1.1504800297034954, - "grad_norm": 1.5259981155395508, - "learning_rate": 3.082533283827508e-05, - "loss": 0.6263, - "step": 130140 - }, - { - "epoch": 1.1505684329638077, - "grad_norm": 3.9004428386688232, - "learning_rate": 3.08238594506032e-05, - "loss": 0.7751, - "step": 130150 - }, - { - "epoch": 1.1506568362241198, - "grad_norm": 3.0532617568969727, - "learning_rate": 3.0822386062931336e-05, - "loss": 0.5985, - "step": 130160 - }, - { - "epoch": 1.1507452394844322, - "grad_norm": 1.6736780405044556, - "learning_rate": 3.0820912675259464e-05, - "loss": 0.6115, - "step": 130170 - }, - { - "epoch": 1.1508336427447445, - "grad_norm": 4.148414134979248, - "learning_rate": 3.081943928758759e-05, - "loss": 0.6236, - "step": 130180 - }, - { - "epoch": 1.1509220460050567, - "grad_norm": 5.934894561767578, - "learning_rate": 3.081796589991572e-05, - "loss": 0.4944, - "step": 130190 - }, - { - "epoch": 1.151010449265369, - "grad_norm": 2.424107313156128, - "learning_rate": 3.0816492512243856e-05, - "loss": 0.6583, - "step": 130200 - }, - { - "epoch": 1.1510988525256811, - "grad_norm": 3.375579833984375, - "learning_rate": 3.081501912457198e-05, - "loss": 0.6433, - "step": 130210 - }, - { - "epoch": 1.1511872557859935, - "grad_norm": 12.654208183288574, - "learning_rate": 3.081354573690011e-05, - "loss": 0.6515, - "step": 130220 - }, - { - "epoch": 1.1512756590463056, - "grad_norm": 7.625489711761475, - "learning_rate": 3.081207234922824e-05, - "loss": 0.562, - "step": 130230 - }, - { - "epoch": 1.151364062306618, - "grad_norm": 2.1072824001312256, - "learning_rate": 3.081059896155637e-05, - "loss": 0.5538, - "step": 130240 - }, - { - "epoch": 1.15145246556693, - "grad_norm": 8.0526123046875, - "learning_rate": 3.08091255738845e-05, - "loss": 0.7138, - "step": 130250 - }, - { - "epoch": 1.1515408688272424, - "grad_norm": 1.326537847518921, - "learning_rate": 3.0807652186212626e-05, - "loss": 0.5371, - "step": 130260 - }, - { - "epoch": 1.1516292720875545, - "grad_norm": 3.9905776977539062, - "learning_rate": 3.0806178798540755e-05, - "loss": 0.6641, - "step": 130270 - }, - { - "epoch": 1.1517176753478668, - "grad_norm": 8.161019325256348, - "learning_rate": 3.080470541086889e-05, - "loss": 0.6056, - "step": 130280 - }, - { - "epoch": 1.1518060786081792, - "grad_norm": 8.429973602294922, - "learning_rate": 3.080323202319702e-05, - "loss": 0.6007, - "step": 130290 - }, - { - "epoch": 1.1518944818684913, - "grad_norm": 2.276729106903076, - "learning_rate": 3.0801758635525146e-05, - "loss": 0.6495, - "step": 130300 - }, - { - "epoch": 1.1519828851288036, - "grad_norm": 2.459988594055176, - "learning_rate": 3.0800285247853275e-05, - "loss": 0.5556, - "step": 130310 - }, - { - "epoch": 1.1520712883891158, - "grad_norm": 1.8681766986846924, - "learning_rate": 3.07988118601814e-05, - "loss": 0.7208, - "step": 130320 - }, - { - "epoch": 1.152159691649428, - "grad_norm": 1.3446437120437622, - "learning_rate": 3.079733847250953e-05, - "loss": 0.4902, - "step": 130330 - }, - { - "epoch": 1.1522480949097402, - "grad_norm": 8.097925186157227, - "learning_rate": 3.079586508483767e-05, - "loss": 0.6125, - "step": 130340 - }, - { - "epoch": 1.1523364981700526, - "grad_norm": 3.3305063247680664, - "learning_rate": 3.0794391697165795e-05, - "loss": 0.6731, - "step": 130350 - }, - { - "epoch": 1.1524249014303647, - "grad_norm": 12.64484977722168, - "learning_rate": 3.079291830949392e-05, - "loss": 0.5832, - "step": 130360 - }, - { - "epoch": 1.152513304690677, - "grad_norm": 3.750962972640991, - "learning_rate": 3.079144492182205e-05, - "loss": 0.6567, - "step": 130370 - }, - { - "epoch": 1.1526017079509892, - "grad_norm": 3.4089438915252686, - "learning_rate": 3.078997153415018e-05, - "loss": 0.5302, - "step": 130380 - }, - { - "epoch": 1.1526901112113015, - "grad_norm": 3.4216701984405518, - "learning_rate": 3.078849814647831e-05, - "loss": 0.6514, - "step": 130390 - }, - { - "epoch": 1.1527785144716138, - "grad_norm": 1.9582709074020386, - "learning_rate": 3.078702475880644e-05, - "loss": 0.7527, - "step": 130400 - }, - { - "epoch": 1.152866917731926, - "grad_norm": 2.0037267208099365, - "learning_rate": 3.078555137113457e-05, - "loss": 0.5566, - "step": 130410 - }, - { - "epoch": 1.152955320992238, - "grad_norm": 2.7218093872070312, - "learning_rate": 3.07840779834627e-05, - "loss": 0.6755, - "step": 130420 - }, - { - "epoch": 1.1530437242525504, - "grad_norm": 3.4097325801849365, - "learning_rate": 3.078260459579083e-05, - "loss": 0.666, - "step": 130430 - }, - { - "epoch": 1.1531321275128628, - "grad_norm": 2.1065242290496826, - "learning_rate": 3.078113120811896e-05, - "loss": 0.6265, - "step": 130440 - }, - { - "epoch": 1.1532205307731749, - "grad_norm": 5.590689182281494, - "learning_rate": 3.0779657820447085e-05, - "loss": 0.635, - "step": 130450 - }, - { - "epoch": 1.1533089340334872, - "grad_norm": 1.4026074409484863, - "learning_rate": 3.0778184432775214e-05, - "loss": 0.6955, - "step": 130460 - }, - { - "epoch": 1.1533973372937993, - "grad_norm": 1.5446447134017944, - "learning_rate": 3.077671104510335e-05, - "loss": 0.5637, - "step": 130470 - }, - { - "epoch": 1.1534857405541117, - "grad_norm": 2.288461685180664, - "learning_rate": 3.077523765743147e-05, - "loss": 0.5959, - "step": 130480 - }, - { - "epoch": 1.1535741438144238, - "grad_norm": 2.1392714977264404, - "learning_rate": 3.0773764269759605e-05, - "loss": 0.5121, - "step": 130490 - }, - { - "epoch": 1.1536625470747361, - "grad_norm": 2.8267087936401367, - "learning_rate": 3.0772290882087734e-05, - "loss": 0.5936, - "step": 130500 - }, - { - "epoch": 1.1537509503350483, - "grad_norm": 0.7872088551521301, - "learning_rate": 3.077081749441586e-05, - "loss": 0.6218, - "step": 130510 - }, - { - "epoch": 1.1538393535953606, - "grad_norm": 4.260744571685791, - "learning_rate": 3.076934410674399e-05, - "loss": 0.6814, - "step": 130520 - }, - { - "epoch": 1.1539277568556727, - "grad_norm": 5.207024097442627, - "learning_rate": 3.0767870719072126e-05, - "loss": 0.6387, - "step": 130530 - }, - { - "epoch": 1.154016160115985, - "grad_norm": 2.6019461154937744, - "learning_rate": 3.076639733140025e-05, - "loss": 0.6577, - "step": 130540 - }, - { - "epoch": 1.1541045633762974, - "grad_norm": 5.212896823883057, - "learning_rate": 3.076492394372838e-05, - "loss": 0.6266, - "step": 130550 - }, - { - "epoch": 1.1541929666366095, - "grad_norm": 4.378382205963135, - "learning_rate": 3.076345055605651e-05, - "loss": 0.7377, - "step": 130560 - }, - { - "epoch": 1.1542813698969219, - "grad_norm": 1.3188276290893555, - "learning_rate": 3.076197716838464e-05, - "loss": 0.6517, - "step": 130570 - }, - { - "epoch": 1.154369773157234, - "grad_norm": 1.741511583328247, - "learning_rate": 3.076050378071277e-05, - "loss": 0.5072, - "step": 130580 - }, - { - "epoch": 1.1544581764175463, - "grad_norm": 2.215941905975342, - "learning_rate": 3.0759030393040896e-05, - "loss": 0.6724, - "step": 130590 - }, - { - "epoch": 1.1545465796778585, - "grad_norm": 1.5650781393051147, - "learning_rate": 3.0757557005369024e-05, - "loss": 0.6223, - "step": 130600 - }, - { - "epoch": 1.1546349829381708, - "grad_norm": 2.669823169708252, - "learning_rate": 3.075608361769716e-05, - "loss": 0.5961, - "step": 130610 - }, - { - "epoch": 1.154723386198483, - "grad_norm": 3.119622230529785, - "learning_rate": 3.075461023002528e-05, - "loss": 0.6584, - "step": 130620 - }, - { - "epoch": 1.1548117894587953, - "grad_norm": 3.438450574874878, - "learning_rate": 3.0753136842353416e-05, - "loss": 0.506, - "step": 130630 - }, - { - "epoch": 1.1549001927191074, - "grad_norm": 2.9536571502685547, - "learning_rate": 3.0751663454681544e-05, - "loss": 0.6153, - "step": 130640 - }, - { - "epoch": 1.1549885959794197, - "grad_norm": 13.768678665161133, - "learning_rate": 3.075019006700967e-05, - "loss": 0.6068, - "step": 130650 - }, - { - "epoch": 1.155076999239732, - "grad_norm": 5.026328086853027, - "learning_rate": 3.07487166793378e-05, - "loss": 0.7131, - "step": 130660 - }, - { - "epoch": 1.1551654025000442, - "grad_norm": 5.939401626586914, - "learning_rate": 3.0747243291665936e-05, - "loss": 0.6545, - "step": 130670 - }, - { - "epoch": 1.1552538057603565, - "grad_norm": 0.8401268720626831, - "learning_rate": 3.074576990399406e-05, - "loss": 0.4732, - "step": 130680 - }, - { - "epoch": 1.1553422090206686, - "grad_norm": 3.457369327545166, - "learning_rate": 3.074429651632219e-05, - "loss": 0.6863, - "step": 130690 - }, - { - "epoch": 1.155430612280981, - "grad_norm": 9.696551322937012, - "learning_rate": 3.074282312865032e-05, - "loss": 0.7699, - "step": 130700 - }, - { - "epoch": 1.155519015541293, - "grad_norm": 23.009403228759766, - "learning_rate": 3.074134974097845e-05, - "loss": 0.6011, - "step": 130710 - }, - { - "epoch": 1.1556074188016054, - "grad_norm": 8.823689460754395, - "learning_rate": 3.073987635330658e-05, - "loss": 0.5357, - "step": 130720 - }, - { - "epoch": 1.1556958220619176, - "grad_norm": 2.050417900085449, - "learning_rate": 3.0738402965634706e-05, - "loss": 0.6402, - "step": 130730 - }, - { - "epoch": 1.15578422532223, - "grad_norm": 5.882505416870117, - "learning_rate": 3.0736929577962835e-05, - "loss": 0.761, - "step": 130740 - }, - { - "epoch": 1.155872628582542, - "grad_norm": 2.0210752487182617, - "learning_rate": 3.073545619029097e-05, - "loss": 0.5858, - "step": 130750 - }, - { - "epoch": 1.1559610318428544, - "grad_norm": 2.07631254196167, - "learning_rate": 3.073398280261909e-05, - "loss": 0.679, - "step": 130760 - }, - { - "epoch": 1.1560494351031667, - "grad_norm": 1.1941617727279663, - "learning_rate": 3.0732509414947227e-05, - "loss": 0.6533, - "step": 130770 - }, - { - "epoch": 1.1561378383634788, - "grad_norm": 1.3758389949798584, - "learning_rate": 3.0731036027275355e-05, - "loss": 0.624, - "step": 130780 - }, - { - "epoch": 1.1562262416237912, - "grad_norm": 5.725162506103516, - "learning_rate": 3.072956263960348e-05, - "loss": 0.6447, - "step": 130790 - }, - { - "epoch": 1.1563146448841033, - "grad_norm": 3.19097638130188, - "learning_rate": 3.072808925193161e-05, - "loss": 0.6202, - "step": 130800 - }, - { - "epoch": 1.1564030481444156, - "grad_norm": 1.3096617460250854, - "learning_rate": 3.072661586425975e-05, - "loss": 0.6101, - "step": 130810 - }, - { - "epoch": 1.1564914514047278, - "grad_norm": 10.097740173339844, - "learning_rate": 3.072514247658787e-05, - "loss": 0.4957, - "step": 130820 - }, - { - "epoch": 1.15657985466504, - "grad_norm": 0.9915809035301208, - "learning_rate": 3.0723669088916003e-05, - "loss": 0.6326, - "step": 130830 - }, - { - "epoch": 1.1566682579253522, - "grad_norm": 7.710951805114746, - "learning_rate": 3.0722195701244125e-05, - "loss": 0.6739, - "step": 130840 - }, - { - "epoch": 1.1567566611856646, - "grad_norm": 2.461761713027954, - "learning_rate": 3.072072231357226e-05, - "loss": 0.5882, - "step": 130850 - }, - { - "epoch": 1.1568450644459767, - "grad_norm": 21.788585662841797, - "learning_rate": 3.071924892590039e-05, - "loss": 0.5491, - "step": 130860 - }, - { - "epoch": 1.156933467706289, - "grad_norm": 2.5861144065856934, - "learning_rate": 3.071777553822852e-05, - "loss": 0.6463, - "step": 130870 - }, - { - "epoch": 1.1570218709666014, - "grad_norm": 1.5807311534881592, - "learning_rate": 3.0716302150556645e-05, - "loss": 0.6314, - "step": 130880 - }, - { - "epoch": 1.1571102742269135, - "grad_norm": 4.687560081481934, - "learning_rate": 3.071482876288478e-05, - "loss": 0.6703, - "step": 130890 - }, - { - "epoch": 1.1571986774872258, - "grad_norm": 2.799945592880249, - "learning_rate": 3.07133553752129e-05, - "loss": 0.6551, - "step": 130900 - }, - { - "epoch": 1.157287080747538, - "grad_norm": 5.429865837097168, - "learning_rate": 3.071188198754104e-05, - "loss": 0.6678, - "step": 130910 - }, - { - "epoch": 1.1573754840078503, - "grad_norm": 2.5880908966064453, - "learning_rate": 3.0710408599869165e-05, - "loss": 0.4931, - "step": 130920 - }, - { - "epoch": 1.1574638872681624, - "grad_norm": 7.995789051055908, - "learning_rate": 3.0708935212197294e-05, - "loss": 0.6173, - "step": 130930 - }, - { - "epoch": 1.1575522905284747, - "grad_norm": 5.808766841888428, - "learning_rate": 3.070746182452542e-05, - "loss": 0.6576, - "step": 130940 - }, - { - "epoch": 1.1576406937887869, - "grad_norm": 5.3204851150512695, - "learning_rate": 3.070598843685355e-05, - "loss": 0.6186, - "step": 130950 - }, - { - "epoch": 1.1577290970490992, - "grad_norm": 8.692299842834473, - "learning_rate": 3.070451504918168e-05, - "loss": 0.6071, - "step": 130960 - }, - { - "epoch": 1.1578175003094113, - "grad_norm": 2.939260721206665, - "learning_rate": 3.0703041661509814e-05, - "loss": 0.7696, - "step": 130970 - }, - { - "epoch": 1.1579059035697237, - "grad_norm": 2.478036642074585, - "learning_rate": 3.0701568273837935e-05, - "loss": 0.5836, - "step": 130980 - }, - { - "epoch": 1.157994306830036, - "grad_norm": 2.0853664875030518, - "learning_rate": 3.070009488616607e-05, - "loss": 0.7075, - "step": 130990 - }, - { - "epoch": 1.1580827100903481, - "grad_norm": 1.2412909269332886, - "learning_rate": 3.06986214984942e-05, - "loss": 0.6712, - "step": 131000 - }, - { - "epoch": 1.1581711133506603, - "grad_norm": 1.3032063245773315, - "learning_rate": 3.069714811082233e-05, - "loss": 0.5988, - "step": 131010 - }, - { - "epoch": 1.1582595166109726, - "grad_norm": 2.303032875061035, - "learning_rate": 3.0695674723150456e-05, - "loss": 0.5472, - "step": 131020 - }, - { - "epoch": 1.158347919871285, - "grad_norm": 5.118271350860596, - "learning_rate": 3.069420133547859e-05, - "loss": 0.5835, - "step": 131030 - }, - { - "epoch": 1.158436323131597, - "grad_norm": 2.0873095989227295, - "learning_rate": 3.069272794780671e-05, - "loss": 0.5617, - "step": 131040 - }, - { - "epoch": 1.1585247263919094, - "grad_norm": 1.1297153234481812, - "learning_rate": 3.069125456013485e-05, - "loss": 0.4775, - "step": 131050 - }, - { - "epoch": 1.1586131296522215, - "grad_norm": 2.575432777404785, - "learning_rate": 3.068978117246297e-05, - "loss": 0.6477, - "step": 131060 - }, - { - "epoch": 1.1587015329125339, - "grad_norm": 2.042219877243042, - "learning_rate": 3.0688307784791104e-05, - "loss": 0.6684, - "step": 131070 - }, - { - "epoch": 1.158789936172846, - "grad_norm": 2.1652231216430664, - "learning_rate": 3.068683439711923e-05, - "loss": 0.6125, - "step": 131080 - }, - { - "epoch": 1.1588783394331583, - "grad_norm": 1.0913035869598389, - "learning_rate": 3.068536100944736e-05, - "loss": 0.5844, - "step": 131090 - }, - { - "epoch": 1.1589667426934704, - "grad_norm": 2.7634623050689697, - "learning_rate": 3.068388762177549e-05, - "loss": 0.5514, - "step": 131100 - }, - { - "epoch": 1.1590551459537828, - "grad_norm": 1.49564790725708, - "learning_rate": 3.0682414234103624e-05, - "loss": 0.5283, - "step": 131110 - }, - { - "epoch": 1.159143549214095, - "grad_norm": 3.3530147075653076, - "learning_rate": 3.0680940846431746e-05, - "loss": 0.6858, - "step": 131120 - }, - { - "epoch": 1.1592319524744072, - "grad_norm": 2.3691318035125732, - "learning_rate": 3.067946745875988e-05, - "loss": 0.6402, - "step": 131130 - }, - { - "epoch": 1.1593203557347196, - "grad_norm": 4.105404376983643, - "learning_rate": 3.067799407108801e-05, - "loss": 0.6941, - "step": 131140 - }, - { - "epoch": 1.1594087589950317, - "grad_norm": 2.8768675327301025, - "learning_rate": 3.067652068341614e-05, - "loss": 0.7107, - "step": 131150 - }, - { - "epoch": 1.159497162255344, - "grad_norm": 5.293215751647949, - "learning_rate": 3.0675047295744266e-05, - "loss": 0.6305, - "step": 131160 - }, - { - "epoch": 1.1595855655156562, - "grad_norm": 2.359454393386841, - "learning_rate": 3.06735739080724e-05, - "loss": 0.6586, - "step": 131170 - }, - { - "epoch": 1.1596739687759685, - "grad_norm": 2.1791038513183594, - "learning_rate": 3.067210052040052e-05, - "loss": 0.6774, - "step": 131180 - }, - { - "epoch": 1.1597623720362806, - "grad_norm": 1.4351047277450562, - "learning_rate": 3.067062713272866e-05, - "loss": 0.615, - "step": 131190 - }, - { - "epoch": 1.159850775296593, - "grad_norm": 7.623877048492432, - "learning_rate": 3.0669153745056786e-05, - "loss": 0.549, - "step": 131200 - }, - { - "epoch": 1.159939178556905, - "grad_norm": 2.908419609069824, - "learning_rate": 3.0667680357384915e-05, - "loss": 0.4899, - "step": 131210 - }, - { - "epoch": 1.1600275818172174, - "grad_norm": 2.050945520401001, - "learning_rate": 3.066620696971304e-05, - "loss": 0.6592, - "step": 131220 - }, - { - "epoch": 1.1601159850775296, - "grad_norm": 1.3170599937438965, - "learning_rate": 3.066473358204117e-05, - "loss": 0.559, - "step": 131230 - }, - { - "epoch": 1.160204388337842, - "grad_norm": 6.787662029266357, - "learning_rate": 3.0663260194369307e-05, - "loss": 0.5855, - "step": 131240 - }, - { - "epoch": 1.1602927915981542, - "grad_norm": 1.2695868015289307, - "learning_rate": 3.0661786806697435e-05, - "loss": 0.5806, - "step": 131250 - }, - { - "epoch": 1.1603811948584664, - "grad_norm": 1.4428120851516724, - "learning_rate": 3.066031341902556e-05, - "loss": 0.5773, - "step": 131260 - }, - { - "epoch": 1.1604695981187787, - "grad_norm": 4.806600570678711, - "learning_rate": 3.065884003135369e-05, - "loss": 0.5664, - "step": 131270 - }, - { - "epoch": 1.1605580013790908, - "grad_norm": 12.411433219909668, - "learning_rate": 3.065736664368182e-05, - "loss": 0.6255, - "step": 131280 - }, - { - "epoch": 1.1606464046394032, - "grad_norm": 1.616111159324646, - "learning_rate": 3.065589325600995e-05, - "loss": 0.6148, - "step": 131290 - }, - { - "epoch": 1.1607348078997153, - "grad_norm": 1.4758148193359375, - "learning_rate": 3.0654419868338083e-05, - "loss": 0.5684, - "step": 131300 - }, - { - "epoch": 1.1608232111600276, - "grad_norm": 1.1272330284118652, - "learning_rate": 3.0652946480666205e-05, - "loss": 0.6464, - "step": 131310 - }, - { - "epoch": 1.1609116144203397, - "grad_norm": 4.339254379272461, - "learning_rate": 3.065147309299434e-05, - "loss": 0.6755, - "step": 131320 - }, - { - "epoch": 1.161000017680652, - "grad_norm": 3.4238412380218506, - "learning_rate": 3.064999970532247e-05, - "loss": 0.4863, - "step": 131330 - }, - { - "epoch": 1.1610884209409642, - "grad_norm": 2.6474673748016357, - "learning_rate": 3.06485263176506e-05, - "loss": 0.5196, - "step": 131340 - }, - { - "epoch": 1.1611768242012765, - "grad_norm": 1.2571967840194702, - "learning_rate": 3.0647052929978725e-05, - "loss": 0.6095, - "step": 131350 - }, - { - "epoch": 1.161265227461589, - "grad_norm": 8.376547813415527, - "learning_rate": 3.064557954230686e-05, - "loss": 0.576, - "step": 131360 - }, - { - "epoch": 1.161353630721901, - "grad_norm": 4.1423773765563965, - "learning_rate": 3.064410615463498e-05, - "loss": 0.6226, - "step": 131370 - }, - { - "epoch": 1.1614420339822134, - "grad_norm": 2.3484625816345215, - "learning_rate": 3.064263276696312e-05, - "loss": 0.6793, - "step": 131380 - }, - { - "epoch": 1.1615304372425255, - "grad_norm": 1.7184317111968994, - "learning_rate": 3.0641159379291245e-05, - "loss": 0.5015, - "step": 131390 - }, - { - "epoch": 1.1616188405028378, - "grad_norm": 1.766471028327942, - "learning_rate": 3.0639685991619374e-05, - "loss": 0.6157, - "step": 131400 - }, - { - "epoch": 1.16170724376315, - "grad_norm": 2.537006139755249, - "learning_rate": 3.06382126039475e-05, - "loss": 0.7882, - "step": 131410 - }, - { - "epoch": 1.1617956470234623, - "grad_norm": 3.338129997253418, - "learning_rate": 3.063673921627563e-05, - "loss": 0.6511, - "step": 131420 - }, - { - "epoch": 1.1618840502837744, - "grad_norm": 1.5497372150421143, - "learning_rate": 3.063526582860376e-05, - "loss": 0.5561, - "step": 131430 - }, - { - "epoch": 1.1619724535440867, - "grad_norm": 4.003422737121582, - "learning_rate": 3.0633792440931894e-05, - "loss": 0.601, - "step": 131440 - }, - { - "epoch": 1.1620608568043989, - "grad_norm": 1.6146740913391113, - "learning_rate": 3.0632319053260016e-05, - "loss": 0.6574, - "step": 131450 - }, - { - "epoch": 1.1621492600647112, - "grad_norm": 1.4565777778625488, - "learning_rate": 3.063084566558815e-05, - "loss": 0.6761, - "step": 131460 - }, - { - "epoch": 1.1622376633250235, - "grad_norm": 1.5680956840515137, - "learning_rate": 3.062937227791628e-05, - "loss": 0.5231, - "step": 131470 - }, - { - "epoch": 1.1623260665853357, - "grad_norm": 1.0105998516082764, - "learning_rate": 3.062789889024441e-05, - "loss": 0.6894, - "step": 131480 - }, - { - "epoch": 1.162414469845648, - "grad_norm": 1.1560819149017334, - "learning_rate": 3.0626425502572536e-05, - "loss": 0.7437, - "step": 131490 - }, - { - "epoch": 1.1625028731059601, - "grad_norm": 2.643648147583008, - "learning_rate": 3.062495211490067e-05, - "loss": 0.6468, - "step": 131500 - }, - { - "epoch": 1.1625912763662725, - "grad_norm": 3.2028660774230957, - "learning_rate": 3.062347872722879e-05, - "loss": 0.6121, - "step": 131510 - }, - { - "epoch": 1.1626796796265846, - "grad_norm": 2.450869083404541, - "learning_rate": 3.062200533955693e-05, - "loss": 0.7223, - "step": 131520 - }, - { - "epoch": 1.162768082886897, - "grad_norm": 0.9343979954719543, - "learning_rate": 3.0620531951885056e-05, - "loss": 0.6489, - "step": 131530 - }, - { - "epoch": 1.162856486147209, - "grad_norm": 0.8441886305809021, - "learning_rate": 3.0619058564213184e-05, - "loss": 0.5035, - "step": 131540 - }, - { - "epoch": 1.1629448894075214, - "grad_norm": 6.100572109222412, - "learning_rate": 3.061758517654131e-05, - "loss": 0.5692, - "step": 131550 - }, - { - "epoch": 1.1630332926678335, - "grad_norm": 5.454118728637695, - "learning_rate": 3.061611178886944e-05, - "loss": 0.5874, - "step": 131560 - }, - { - "epoch": 1.1631216959281458, - "grad_norm": 1.5155504941940308, - "learning_rate": 3.061463840119757e-05, - "loss": 0.6349, - "step": 131570 - }, - { - "epoch": 1.1632100991884582, - "grad_norm": 7.628772258758545, - "learning_rate": 3.0613165013525704e-05, - "loss": 0.5355, - "step": 131580 - }, - { - "epoch": 1.1632985024487703, - "grad_norm": 0.8436858654022217, - "learning_rate": 3.0611691625853826e-05, - "loss": 0.5978, - "step": 131590 - }, - { - "epoch": 1.1633869057090824, - "grad_norm": 1.6208841800689697, - "learning_rate": 3.061021823818196e-05, - "loss": 0.598, - "step": 131600 - }, - { - "epoch": 1.1634753089693948, - "grad_norm": 1.880710482597351, - "learning_rate": 3.060874485051009e-05, - "loss": 0.6345, - "step": 131610 - }, - { - "epoch": 1.1635637122297071, - "grad_norm": 2.420870780944824, - "learning_rate": 3.060727146283822e-05, - "loss": 0.5964, - "step": 131620 - }, - { - "epoch": 1.1636521154900192, - "grad_norm": 5.38777494430542, - "learning_rate": 3.0605798075166346e-05, - "loss": 0.6127, - "step": 131630 - }, - { - "epoch": 1.1637405187503316, - "grad_norm": 1.035424828529358, - "learning_rate": 3.060432468749448e-05, - "loss": 0.6733, - "step": 131640 - }, - { - "epoch": 1.1638289220106437, - "grad_norm": 2.5046041011810303, - "learning_rate": 3.06028512998226e-05, - "loss": 0.5019, - "step": 131650 - }, - { - "epoch": 1.163917325270956, - "grad_norm": 1.4642952680587769, - "learning_rate": 3.060137791215074e-05, - "loss": 0.7098, - "step": 131660 - }, - { - "epoch": 1.1640057285312682, - "grad_norm": 3.817128896713257, - "learning_rate": 3.059990452447886e-05, - "loss": 0.6361, - "step": 131670 - }, - { - "epoch": 1.1640941317915805, - "grad_norm": 3.3395297527313232, - "learning_rate": 3.0598431136806995e-05, - "loss": 0.7037, - "step": 131680 - }, - { - "epoch": 1.1641825350518926, - "grad_norm": 0.9705575108528137, - "learning_rate": 3.059695774913512e-05, - "loss": 0.7348, - "step": 131690 - }, - { - "epoch": 1.164270938312205, - "grad_norm": 1.3147315979003906, - "learning_rate": 3.059548436146325e-05, - "loss": 0.6297, - "step": 131700 - }, - { - "epoch": 1.164359341572517, - "grad_norm": 3.56866192817688, - "learning_rate": 3.059401097379138e-05, - "loss": 0.5257, - "step": 131710 - }, - { - "epoch": 1.1644477448328294, - "grad_norm": 1.7186813354492188, - "learning_rate": 3.0592537586119515e-05, - "loss": 0.6567, - "step": 131720 - }, - { - "epoch": 1.1645361480931418, - "grad_norm": 3.382847547531128, - "learning_rate": 3.0591064198447637e-05, - "loss": 0.6167, - "step": 131730 - }, - { - "epoch": 1.1646245513534539, - "grad_norm": 5.220097064971924, - "learning_rate": 3.058959081077577e-05, - "loss": 0.4954, - "step": 131740 - }, - { - "epoch": 1.1647129546137662, - "grad_norm": 8.415491104125977, - "learning_rate": 3.05881174231039e-05, - "loss": 0.6588, - "step": 131750 - }, - { - "epoch": 1.1648013578740783, - "grad_norm": 2.3298912048339844, - "learning_rate": 3.058664403543203e-05, - "loss": 0.556, - "step": 131760 - }, - { - "epoch": 1.1648897611343907, - "grad_norm": 1.4740550518035889, - "learning_rate": 3.058517064776016e-05, - "loss": 0.6669, - "step": 131770 - }, - { - "epoch": 1.1649781643947028, - "grad_norm": 3.5775668621063232, - "learning_rate": 3.0583697260088285e-05, - "loss": 0.6344, - "step": 131780 - }, - { - "epoch": 1.1650665676550152, - "grad_norm": 2.814379930496216, - "learning_rate": 3.0582223872416413e-05, - "loss": 0.6525, - "step": 131790 - }, - { - "epoch": 1.1651549709153273, - "grad_norm": 2.768153429031372, - "learning_rate": 3.058075048474455e-05, - "loss": 0.648, - "step": 131800 - }, - { - "epoch": 1.1652433741756396, - "grad_norm": 8.114901542663574, - "learning_rate": 3.057927709707267e-05, - "loss": 0.5869, - "step": 131810 - }, - { - "epoch": 1.1653317774359517, - "grad_norm": 2.019869804382324, - "learning_rate": 3.0577803709400805e-05, - "loss": 0.6177, - "step": 131820 - }, - { - "epoch": 1.165420180696264, - "grad_norm": 0.9405084252357483, - "learning_rate": 3.0576330321728934e-05, - "loss": 0.5722, - "step": 131830 - }, - { - "epoch": 1.1655085839565764, - "grad_norm": 2.8537769317626953, - "learning_rate": 3.057485693405706e-05, - "loss": 0.5964, - "step": 131840 - }, - { - "epoch": 1.1655969872168885, - "grad_norm": 3.6890366077423096, - "learning_rate": 3.057338354638519e-05, - "loss": 0.7653, - "step": 131850 - }, - { - "epoch": 1.1656853904772009, - "grad_norm": 1.1687262058258057, - "learning_rate": 3.0571910158713326e-05, - "loss": 0.7162, - "step": 131860 - }, - { - "epoch": 1.165773793737513, - "grad_norm": 2.0385937690734863, - "learning_rate": 3.057043677104145e-05, - "loss": 0.6421, - "step": 131870 - }, - { - "epoch": 1.1658621969978253, - "grad_norm": 5.374686241149902, - "learning_rate": 3.056896338336958e-05, - "loss": 0.5068, - "step": 131880 - }, - { - "epoch": 1.1659506002581375, - "grad_norm": 1.3698152303695679, - "learning_rate": 3.0567489995697704e-05, - "loss": 0.6813, - "step": 131890 - }, - { - "epoch": 1.1660390035184498, - "grad_norm": 7.347153663635254, - "learning_rate": 3.056601660802584e-05, - "loss": 0.6302, - "step": 131900 - }, - { - "epoch": 1.166127406778762, - "grad_norm": 26.705734252929688, - "learning_rate": 3.056454322035397e-05, - "loss": 0.6034, - "step": 131910 - }, - { - "epoch": 1.1662158100390743, - "grad_norm": 1.0941476821899414, - "learning_rate": 3.0563069832682096e-05, - "loss": 0.5443, - "step": 131920 - }, - { - "epoch": 1.1663042132993864, - "grad_norm": 14.015328407287598, - "learning_rate": 3.0561596445010224e-05, - "loss": 0.5796, - "step": 131930 - }, - { - "epoch": 1.1663926165596987, - "grad_norm": 2.918259859085083, - "learning_rate": 3.056012305733836e-05, - "loss": 0.6019, - "step": 131940 - }, - { - "epoch": 1.166481019820011, - "grad_norm": 2.0471794605255127, - "learning_rate": 3.055864966966648e-05, - "loss": 0.7929, - "step": 131950 - }, - { - "epoch": 1.1665694230803232, - "grad_norm": 6.047561168670654, - "learning_rate": 3.0557176281994616e-05, - "loss": 0.5976, - "step": 131960 - }, - { - "epoch": 1.1666578263406355, - "grad_norm": 1.3194369077682495, - "learning_rate": 3.0555702894322744e-05, - "loss": 0.4825, - "step": 131970 - }, - { - "epoch": 1.1667462296009476, - "grad_norm": 2.630715847015381, - "learning_rate": 3.055422950665087e-05, - "loss": 0.5792, - "step": 131980 - }, - { - "epoch": 1.16683463286126, - "grad_norm": 1.671630620956421, - "learning_rate": 3.0552756118979e-05, - "loss": 0.6355, - "step": 131990 - }, - { - "epoch": 1.166923036121572, - "grad_norm": 1.4064486026763916, - "learning_rate": 3.0551282731307136e-05, - "loss": 0.554, - "step": 132000 - }, - { - "epoch": 1.1670114393818845, - "grad_norm": 1.7668626308441162, - "learning_rate": 3.054980934363526e-05, - "loss": 0.6176, - "step": 132010 - }, - { - "epoch": 1.1670998426421966, - "grad_norm": 6.646592140197754, - "learning_rate": 3.054833595596339e-05, - "loss": 0.6725, - "step": 132020 - }, - { - "epoch": 1.167188245902509, - "grad_norm": 15.09215259552002, - "learning_rate": 3.054686256829152e-05, - "loss": 0.5482, - "step": 132030 - }, - { - "epoch": 1.167276649162821, - "grad_norm": 3.1803817749023438, - "learning_rate": 3.054538918061965e-05, - "loss": 0.7204, - "step": 132040 - }, - { - "epoch": 1.1673650524231334, - "grad_norm": 5.396078586578369, - "learning_rate": 3.054391579294778e-05, - "loss": 0.5977, - "step": 132050 - }, - { - "epoch": 1.1674534556834457, - "grad_norm": 4.771073818206787, - "learning_rate": 3.0542442405275906e-05, - "loss": 0.7139, - "step": 132060 - }, - { - "epoch": 1.1675418589437578, - "grad_norm": 13.232796669006348, - "learning_rate": 3.0540969017604035e-05, - "loss": 0.6498, - "step": 132070 - }, - { - "epoch": 1.1676302622040702, - "grad_norm": 1.5892701148986816, - "learning_rate": 3.053949562993217e-05, - "loss": 0.5872, - "step": 132080 - }, - { - "epoch": 1.1677186654643823, - "grad_norm": 2.342539072036743, - "learning_rate": 3.05380222422603e-05, - "loss": 0.6666, - "step": 132090 - }, - { - "epoch": 1.1678070687246946, - "grad_norm": 2.24224853515625, - "learning_rate": 3.0536548854588426e-05, - "loss": 0.6869, - "step": 132100 - }, - { - "epoch": 1.1678954719850068, - "grad_norm": 3.398423194885254, - "learning_rate": 3.0535075466916555e-05, - "loss": 0.6473, - "step": 132110 - }, - { - "epoch": 1.167983875245319, - "grad_norm": 1.142639398574829, - "learning_rate": 3.053360207924468e-05, - "loss": 0.5882, - "step": 132120 - }, - { - "epoch": 1.1680722785056312, - "grad_norm": 4.320949554443359, - "learning_rate": 3.053212869157281e-05, - "loss": 0.5157, - "step": 132130 - }, - { - "epoch": 1.1681606817659436, - "grad_norm": 2.6523942947387695, - "learning_rate": 3.053065530390094e-05, - "loss": 0.728, - "step": 132140 - }, - { - "epoch": 1.1682490850262557, - "grad_norm": 9.76197624206543, - "learning_rate": 3.0529181916229075e-05, - "loss": 0.5348, - "step": 132150 - }, - { - "epoch": 1.168337488286568, - "grad_norm": 2.3899006843566895, - "learning_rate": 3.05277085285572e-05, - "loss": 0.6289, - "step": 132160 - }, - { - "epoch": 1.1684258915468804, - "grad_norm": 4.8326263427734375, - "learning_rate": 3.052623514088533e-05, - "loss": 0.5918, - "step": 132170 - }, - { - "epoch": 1.1685142948071925, - "grad_norm": 1.922325611114502, - "learning_rate": 3.052476175321346e-05, - "loss": 0.56, - "step": 132180 - }, - { - "epoch": 1.1686026980675046, - "grad_norm": 1.5457454919815063, - "learning_rate": 3.052328836554159e-05, - "loss": 0.5192, - "step": 132190 - }, - { - "epoch": 1.168691101327817, - "grad_norm": 2.1275956630706787, - "learning_rate": 3.052181497786972e-05, - "loss": 0.7522, - "step": 132200 - }, - { - "epoch": 1.1687795045881293, - "grad_norm": 0.6837577223777771, - "learning_rate": 3.052034159019785e-05, - "loss": 0.6392, - "step": 132210 - }, - { - "epoch": 1.1688679078484414, - "grad_norm": 3.106041669845581, - "learning_rate": 3.051886820252598e-05, - "loss": 0.6022, - "step": 132220 - }, - { - "epoch": 1.1689563111087538, - "grad_norm": 1.675716757774353, - "learning_rate": 3.0517394814854105e-05, - "loss": 0.5052, - "step": 132230 - }, - { - "epoch": 1.1690447143690659, - "grad_norm": 1.3169463872909546, - "learning_rate": 3.0515921427182237e-05, - "loss": 0.6393, - "step": 132240 - }, - { - "epoch": 1.1691331176293782, - "grad_norm": 3.624569892883301, - "learning_rate": 3.0514448039510362e-05, - "loss": 0.7519, - "step": 132250 - }, - { - "epoch": 1.1692215208896903, - "grad_norm": 1.2276462316513062, - "learning_rate": 3.0512974651838494e-05, - "loss": 0.4475, - "step": 132260 - }, - { - "epoch": 1.1693099241500027, - "grad_norm": 4.70722770690918, - "learning_rate": 3.0511501264166625e-05, - "loss": 0.5525, - "step": 132270 - }, - { - "epoch": 1.1693983274103148, - "grad_norm": 2.2845730781555176, - "learning_rate": 3.051002787649475e-05, - "loss": 0.6448, - "step": 132280 - }, - { - "epoch": 1.1694867306706271, - "grad_norm": 1.9513633251190186, - "learning_rate": 3.0508554488822882e-05, - "loss": 0.6228, - "step": 132290 - }, - { - "epoch": 1.1695751339309393, - "grad_norm": 5.279168128967285, - "learning_rate": 3.0507081101151014e-05, - "loss": 0.6417, - "step": 132300 - }, - { - "epoch": 1.1696635371912516, - "grad_norm": 2.700528860092163, - "learning_rate": 3.050560771347914e-05, - "loss": 0.6448, - "step": 132310 - }, - { - "epoch": 1.169751940451564, - "grad_norm": 6.037847518920898, - "learning_rate": 3.050413432580727e-05, - "loss": 0.659, - "step": 132320 - }, - { - "epoch": 1.169840343711876, - "grad_norm": 2.064674139022827, - "learning_rate": 3.0502660938135402e-05, - "loss": 0.6646, - "step": 132330 - }, - { - "epoch": 1.1699287469721884, - "grad_norm": 1.5199215412139893, - "learning_rate": 3.0501187550463527e-05, - "loss": 0.6334, - "step": 132340 - }, - { - "epoch": 1.1700171502325005, - "grad_norm": 1.3490065336227417, - "learning_rate": 3.049971416279166e-05, - "loss": 0.5442, - "step": 132350 - }, - { - "epoch": 1.1701055534928129, - "grad_norm": 3.5852301120758057, - "learning_rate": 3.0498240775119784e-05, - "loss": 0.6561, - "step": 132360 - }, - { - "epoch": 1.170193956753125, - "grad_norm": 2.237496852874756, - "learning_rate": 3.0496767387447916e-05, - "loss": 0.5177, - "step": 132370 - }, - { - "epoch": 1.1702823600134373, - "grad_norm": 7.085289001464844, - "learning_rate": 3.0495293999776047e-05, - "loss": 0.4908, - "step": 132380 - }, - { - "epoch": 1.1703707632737494, - "grad_norm": 3.303255081176758, - "learning_rate": 3.0493820612104172e-05, - "loss": 0.5905, - "step": 132390 - }, - { - "epoch": 1.1704591665340618, - "grad_norm": 3.2829606533050537, - "learning_rate": 3.0492347224432304e-05, - "loss": 0.6725, - "step": 132400 - }, - { - "epoch": 1.170547569794374, - "grad_norm": 2.7317938804626465, - "learning_rate": 3.0490873836760436e-05, - "loss": 0.6341, - "step": 132410 - }, - { - "epoch": 1.1706359730546863, - "grad_norm": 1.486572504043579, - "learning_rate": 3.048940044908856e-05, - "loss": 0.4655, - "step": 132420 - }, - { - "epoch": 1.1707243763149986, - "grad_norm": 2.321272373199463, - "learning_rate": 3.0487927061416693e-05, - "loss": 0.737, - "step": 132430 - }, - { - "epoch": 1.1708127795753107, - "grad_norm": 2.659299373626709, - "learning_rate": 3.0486453673744824e-05, - "loss": 0.638, - "step": 132440 - }, - { - "epoch": 1.170901182835623, - "grad_norm": 6.0426106452941895, - "learning_rate": 3.048498028607295e-05, - "loss": 0.601, - "step": 132450 - }, - { - "epoch": 1.1709895860959352, - "grad_norm": 1.1585391759872437, - "learning_rate": 3.048350689840108e-05, - "loss": 0.5628, - "step": 132460 - }, - { - "epoch": 1.1710779893562475, - "grad_norm": 1.6140022277832031, - "learning_rate": 3.0482033510729213e-05, - "loss": 0.5131, - "step": 132470 - }, - { - "epoch": 1.1711663926165596, - "grad_norm": 1.9316903352737427, - "learning_rate": 3.0480560123057338e-05, - "loss": 0.6078, - "step": 132480 - }, - { - "epoch": 1.171254795876872, - "grad_norm": 5.598964691162109, - "learning_rate": 3.047908673538547e-05, - "loss": 0.7338, - "step": 132490 - }, - { - "epoch": 1.171343199137184, - "grad_norm": 2.369739055633545, - "learning_rate": 3.0477613347713598e-05, - "loss": 0.6196, - "step": 132500 - }, - { - "epoch": 1.1714316023974964, - "grad_norm": 14.49877643585205, - "learning_rate": 3.0476139960041726e-05, - "loss": 0.7379, - "step": 132510 - }, - { - "epoch": 1.1715200056578086, - "grad_norm": 1.8578732013702393, - "learning_rate": 3.0474666572369858e-05, - "loss": 0.5704, - "step": 132520 - }, - { - "epoch": 1.171608408918121, - "grad_norm": 2.9169743061065674, - "learning_rate": 3.0473193184697986e-05, - "loss": 0.6851, - "step": 132530 - }, - { - "epoch": 1.1716968121784332, - "grad_norm": 1.8212313652038574, - "learning_rate": 3.0471719797026115e-05, - "loss": 0.5748, - "step": 132540 - }, - { - "epoch": 1.1717852154387454, - "grad_norm": 6.714269161224365, - "learning_rate": 3.0470246409354246e-05, - "loss": 0.5968, - "step": 132550 - }, - { - "epoch": 1.1718736186990577, - "grad_norm": 0.8977352380752563, - "learning_rate": 3.0468773021682375e-05, - "loss": 0.5727, - "step": 132560 - }, - { - "epoch": 1.1719620219593698, - "grad_norm": 4.852736949920654, - "learning_rate": 3.0467299634010503e-05, - "loss": 0.5725, - "step": 132570 - }, - { - "epoch": 1.1720504252196822, - "grad_norm": 1.6195685863494873, - "learning_rate": 3.0465826246338635e-05, - "loss": 0.5958, - "step": 132580 - }, - { - "epoch": 1.1721388284799943, - "grad_norm": 2.600242853164673, - "learning_rate": 3.0464352858666763e-05, - "loss": 0.6953, - "step": 132590 - }, - { - "epoch": 1.1722272317403066, - "grad_norm": 1.6285310983657837, - "learning_rate": 3.046287947099489e-05, - "loss": 0.6017, - "step": 132600 - }, - { - "epoch": 1.1723156350006187, - "grad_norm": 16.744966506958008, - "learning_rate": 3.046140608332302e-05, - "loss": 0.7311, - "step": 132610 - }, - { - "epoch": 1.172404038260931, - "grad_norm": 2.718080997467041, - "learning_rate": 3.045993269565115e-05, - "loss": 0.7, - "step": 132620 - }, - { - "epoch": 1.1724924415212432, - "grad_norm": 5.0117998123168945, - "learning_rate": 3.045845930797928e-05, - "loss": 0.5889, - "step": 132630 - }, - { - "epoch": 1.1725808447815556, - "grad_norm": 3.1515860557556152, - "learning_rate": 3.0456985920307408e-05, - "loss": 0.5895, - "step": 132640 - }, - { - "epoch": 1.172669248041868, - "grad_norm": 2.0903701782226562, - "learning_rate": 3.045551253263554e-05, - "loss": 0.5522, - "step": 132650 - }, - { - "epoch": 1.17275765130218, - "grad_norm": 7.477038383483887, - "learning_rate": 3.045403914496367e-05, - "loss": 0.715, - "step": 132660 - }, - { - "epoch": 1.1728460545624924, - "grad_norm": 1.654893159866333, - "learning_rate": 3.0452565757291797e-05, - "loss": 0.5086, - "step": 132670 - }, - { - "epoch": 1.1729344578228045, - "grad_norm": 2.200843095779419, - "learning_rate": 3.045109236961993e-05, - "loss": 0.5562, - "step": 132680 - }, - { - "epoch": 1.1730228610831168, - "grad_norm": 4.3543009757995605, - "learning_rate": 3.0449618981948057e-05, - "loss": 0.6624, - "step": 132690 - }, - { - "epoch": 1.173111264343429, - "grad_norm": 5.937431812286377, - "learning_rate": 3.0448145594276185e-05, - "loss": 0.7204, - "step": 132700 - }, - { - "epoch": 1.1731996676037413, - "grad_norm": 1.456529140472412, - "learning_rate": 3.0446672206604317e-05, - "loss": 0.5367, - "step": 132710 - }, - { - "epoch": 1.1732880708640534, - "grad_norm": 6.336716175079346, - "learning_rate": 3.0445198818932442e-05, - "loss": 0.5996, - "step": 132720 - }, - { - "epoch": 1.1733764741243657, - "grad_norm": 3.082418918609619, - "learning_rate": 3.0443725431260574e-05, - "loss": 0.6777, - "step": 132730 - }, - { - "epoch": 1.1734648773846779, - "grad_norm": 4.456279277801514, - "learning_rate": 3.0442252043588705e-05, - "loss": 0.6322, - "step": 132740 - }, - { - "epoch": 1.1735532806449902, - "grad_norm": 1.7743403911590576, - "learning_rate": 3.044077865591683e-05, - "loss": 0.5483, - "step": 132750 - }, - { - "epoch": 1.1736416839053025, - "grad_norm": 7.261760234832764, - "learning_rate": 3.0439305268244962e-05, - "loss": 0.614, - "step": 132760 - }, - { - "epoch": 1.1737300871656147, - "grad_norm": 2.1416265964508057, - "learning_rate": 3.0437831880573094e-05, - "loss": 0.4852, - "step": 132770 - }, - { - "epoch": 1.1738184904259268, - "grad_norm": 5.894510746002197, - "learning_rate": 3.043635849290122e-05, - "loss": 0.6076, - "step": 132780 - }, - { - "epoch": 1.1739068936862391, - "grad_norm": 1.7651535272598267, - "learning_rate": 3.043488510522935e-05, - "loss": 0.5231, - "step": 132790 - }, - { - "epoch": 1.1739952969465515, - "grad_norm": 3.848479747772217, - "learning_rate": 3.0433411717557482e-05, - "loss": 0.6342, - "step": 132800 - }, - { - "epoch": 1.1740837002068636, - "grad_norm": 1.8462088108062744, - "learning_rate": 3.0431938329885607e-05, - "loss": 0.5902, - "step": 132810 - }, - { - "epoch": 1.174172103467176, - "grad_norm": 2.1441943645477295, - "learning_rate": 3.043046494221374e-05, - "loss": 0.5651, - "step": 132820 - }, - { - "epoch": 1.174260506727488, - "grad_norm": 1.283730149269104, - "learning_rate": 3.0428991554541864e-05, - "loss": 0.5917, - "step": 132830 - }, - { - "epoch": 1.1743489099878004, - "grad_norm": 3.028813362121582, - "learning_rate": 3.0427518166869996e-05, - "loss": 0.6284, - "step": 132840 - }, - { - "epoch": 1.1744373132481125, - "grad_norm": 10.345865249633789, - "learning_rate": 3.0426044779198127e-05, - "loss": 0.5326, - "step": 132850 - }, - { - "epoch": 1.1745257165084249, - "grad_norm": 16.154863357543945, - "learning_rate": 3.0424571391526252e-05, - "loss": 0.6167, - "step": 132860 - }, - { - "epoch": 1.174614119768737, - "grad_norm": 5.269153594970703, - "learning_rate": 3.0423098003854384e-05, - "loss": 0.5542, - "step": 132870 - }, - { - "epoch": 1.1747025230290493, - "grad_norm": 2.8429760932922363, - "learning_rate": 3.0421624616182516e-05, - "loss": 0.5917, - "step": 132880 - }, - { - "epoch": 1.1747909262893614, - "grad_norm": 3.219909429550171, - "learning_rate": 3.042015122851064e-05, - "loss": 0.7126, - "step": 132890 - }, - { - "epoch": 1.1748793295496738, - "grad_norm": 3.921576499938965, - "learning_rate": 3.0418677840838773e-05, - "loss": 0.6151, - "step": 132900 - }, - { - "epoch": 1.1749677328099861, - "grad_norm": 2.368994951248169, - "learning_rate": 3.0417204453166904e-05, - "loss": 0.6771, - "step": 132910 - }, - { - "epoch": 1.1750561360702982, - "grad_norm": 10.023112297058105, - "learning_rate": 3.041573106549503e-05, - "loss": 0.602, - "step": 132920 - }, - { - "epoch": 1.1751445393306106, - "grad_norm": 7.554394721984863, - "learning_rate": 3.041425767782316e-05, - "loss": 0.5795, - "step": 132930 - }, - { - "epoch": 1.1752329425909227, - "grad_norm": 2.894916296005249, - "learning_rate": 3.0412784290151293e-05, - "loss": 0.6606, - "step": 132940 - }, - { - "epoch": 1.175321345851235, - "grad_norm": 4.400060653686523, - "learning_rate": 3.0411310902479418e-05, - "loss": 0.5947, - "step": 132950 - }, - { - "epoch": 1.1754097491115472, - "grad_norm": 1.7976292371749878, - "learning_rate": 3.040983751480755e-05, - "loss": 0.6628, - "step": 132960 - }, - { - "epoch": 1.1754981523718595, - "grad_norm": 3.646946668624878, - "learning_rate": 3.0408364127135674e-05, - "loss": 0.6524, - "step": 132970 - }, - { - "epoch": 1.1755865556321716, - "grad_norm": 1.9021406173706055, - "learning_rate": 3.0406890739463806e-05, - "loss": 0.4782, - "step": 132980 - }, - { - "epoch": 1.175674958892484, - "grad_norm": 2.6747212409973145, - "learning_rate": 3.0405417351791938e-05, - "loss": 0.7948, - "step": 132990 - }, - { - "epoch": 1.175763362152796, - "grad_norm": 4.680131912231445, - "learning_rate": 3.0403943964120063e-05, - "loss": 0.6384, - "step": 133000 - }, - { - "epoch": 1.1758517654131084, - "grad_norm": 3.6572086811065674, - "learning_rate": 3.0402470576448195e-05, - "loss": 0.6263, - "step": 133010 - }, - { - "epoch": 1.1759401686734208, - "grad_norm": 2.589233160018921, - "learning_rate": 3.0400997188776326e-05, - "loss": 0.5465, - "step": 133020 - }, - { - "epoch": 1.176028571933733, - "grad_norm": 1.347601056098938, - "learning_rate": 3.039952380110445e-05, - "loss": 0.6568, - "step": 133030 - }, - { - "epoch": 1.1761169751940452, - "grad_norm": 2.90478515625, - "learning_rate": 3.0398050413432583e-05, - "loss": 0.6734, - "step": 133040 - }, - { - "epoch": 1.1762053784543574, - "grad_norm": 3.321408748626709, - "learning_rate": 3.0396577025760715e-05, - "loss": 0.7163, - "step": 133050 - }, - { - "epoch": 1.1762937817146697, - "grad_norm": 8.990157127380371, - "learning_rate": 3.039510363808884e-05, - "loss": 0.6033, - "step": 133060 - }, - { - "epoch": 1.1763821849749818, - "grad_norm": 6.883105278015137, - "learning_rate": 3.039363025041697e-05, - "loss": 0.6572, - "step": 133070 - }, - { - "epoch": 1.1764705882352942, - "grad_norm": 6.7309041023254395, - "learning_rate": 3.0392156862745097e-05, - "loss": 0.5922, - "step": 133080 - }, - { - "epoch": 1.1765589914956063, - "grad_norm": 1.2131184339523315, - "learning_rate": 3.0390683475073228e-05, - "loss": 0.6746, - "step": 133090 - }, - { - "epoch": 1.1766473947559186, - "grad_norm": 3.128512382507324, - "learning_rate": 3.038921008740136e-05, - "loss": 0.6037, - "step": 133100 - }, - { - "epoch": 1.1767357980162307, - "grad_norm": 2.463557481765747, - "learning_rate": 3.0387736699729485e-05, - "loss": 0.5792, - "step": 133110 - }, - { - "epoch": 1.176824201276543, - "grad_norm": 1.2875701189041138, - "learning_rate": 3.0386263312057617e-05, - "loss": 0.4718, - "step": 133120 - }, - { - "epoch": 1.1769126045368554, - "grad_norm": 0.9055109024047852, - "learning_rate": 3.038478992438575e-05, - "loss": 0.6206, - "step": 133130 - }, - { - "epoch": 1.1770010077971675, - "grad_norm": 6.1861162185668945, - "learning_rate": 3.0383316536713873e-05, - "loss": 0.6528, - "step": 133140 - }, - { - "epoch": 1.1770894110574799, - "grad_norm": 6.8475775718688965, - "learning_rate": 3.0381843149042005e-05, - "loss": 0.6514, - "step": 133150 - }, - { - "epoch": 1.177177814317792, - "grad_norm": 11.177821159362793, - "learning_rate": 3.0380369761370137e-05, - "loss": 0.6521, - "step": 133160 - }, - { - "epoch": 1.1772662175781043, - "grad_norm": 1.2997747659683228, - "learning_rate": 3.0378896373698262e-05, - "loss": 0.4423, - "step": 133170 - }, - { - "epoch": 1.1773546208384165, - "grad_norm": 2.687894105911255, - "learning_rate": 3.0377422986026394e-05, - "loss": 0.4874, - "step": 133180 - }, - { - "epoch": 1.1774430240987288, - "grad_norm": 3.052844762802124, - "learning_rate": 3.037594959835452e-05, - "loss": 0.6034, - "step": 133190 - }, - { - "epoch": 1.177531427359041, - "grad_norm": 4.097358226776123, - "learning_rate": 3.037447621068265e-05, - "loss": 0.7755, - "step": 133200 - }, - { - "epoch": 1.1776198306193533, - "grad_norm": 7.128112316131592, - "learning_rate": 3.0373002823010782e-05, - "loss": 0.6449, - "step": 133210 - }, - { - "epoch": 1.1777082338796654, - "grad_norm": 0.8843667507171631, - "learning_rate": 3.0371529435338907e-05, - "loss": 0.5929, - "step": 133220 - }, - { - "epoch": 1.1777966371399777, - "grad_norm": 1.8608607053756714, - "learning_rate": 3.037005604766704e-05, - "loss": 0.7112, - "step": 133230 - }, - { - "epoch": 1.17788504040029, - "grad_norm": 5.936575889587402, - "learning_rate": 3.036858265999517e-05, - "loss": 0.6388, - "step": 133240 - }, - { - "epoch": 1.1779734436606022, - "grad_norm": 3.8557424545288086, - "learning_rate": 3.0367109272323295e-05, - "loss": 0.6037, - "step": 133250 - }, - { - "epoch": 1.1780618469209145, - "grad_norm": 4.412086009979248, - "learning_rate": 3.0365635884651427e-05, - "loss": 0.5401, - "step": 133260 - }, - { - "epoch": 1.1781502501812267, - "grad_norm": 1.3330632448196411, - "learning_rate": 3.036416249697956e-05, - "loss": 0.5513, - "step": 133270 - }, - { - "epoch": 1.178238653441539, - "grad_norm": 3.633444309234619, - "learning_rate": 3.0362689109307684e-05, - "loss": 0.5331, - "step": 133280 - }, - { - "epoch": 1.1783270567018511, - "grad_norm": 6.16020393371582, - "learning_rate": 3.0361215721635816e-05, - "loss": 0.7169, - "step": 133290 - }, - { - "epoch": 1.1784154599621635, - "grad_norm": 3.3290483951568604, - "learning_rate": 3.035974233396394e-05, - "loss": 0.5634, - "step": 133300 - }, - { - "epoch": 1.1785038632224756, - "grad_norm": 10.795690536499023, - "learning_rate": 3.0358268946292072e-05, - "loss": 0.5967, - "step": 133310 - }, - { - "epoch": 1.178592266482788, - "grad_norm": 3.8134284019470215, - "learning_rate": 3.0356795558620204e-05, - "loss": 0.6132, - "step": 133320 - }, - { - "epoch": 1.1786806697431, - "grad_norm": 1.540705919265747, - "learning_rate": 3.035532217094833e-05, - "loss": 0.5453, - "step": 133330 - }, - { - "epoch": 1.1787690730034124, - "grad_norm": 2.7744524478912354, - "learning_rate": 3.035384878327646e-05, - "loss": 0.5002, - "step": 133340 - }, - { - "epoch": 1.1788574762637247, - "grad_norm": 6.923981189727783, - "learning_rate": 3.0352375395604593e-05, - "loss": 0.6694, - "step": 133350 - }, - { - "epoch": 1.1789458795240368, - "grad_norm": 2.4157350063323975, - "learning_rate": 3.0350902007932718e-05, - "loss": 0.6004, - "step": 133360 - }, - { - "epoch": 1.179034282784349, - "grad_norm": 6.4708781242370605, - "learning_rate": 3.034942862026085e-05, - "loss": 0.5346, - "step": 133370 - }, - { - "epoch": 1.1791226860446613, - "grad_norm": 1.2918226718902588, - "learning_rate": 3.034795523258898e-05, - "loss": 0.5839, - "step": 133380 - }, - { - "epoch": 1.1792110893049736, - "grad_norm": 2.116534948348999, - "learning_rate": 3.0346481844917106e-05, - "loss": 0.5119, - "step": 133390 - }, - { - "epoch": 1.1792994925652858, - "grad_norm": 3.1565561294555664, - "learning_rate": 3.0345008457245238e-05, - "loss": 0.482, - "step": 133400 - }, - { - "epoch": 1.179387895825598, - "grad_norm": 23.30229377746582, - "learning_rate": 3.034353506957337e-05, - "loss": 0.561, - "step": 133410 - }, - { - "epoch": 1.1794762990859102, - "grad_norm": 2.0253965854644775, - "learning_rate": 3.0342061681901494e-05, - "loss": 0.6565, - "step": 133420 - }, - { - "epoch": 1.1795647023462226, - "grad_norm": 7.668882369995117, - "learning_rate": 3.0340588294229626e-05, - "loss": 0.5438, - "step": 133430 - }, - { - "epoch": 1.1796531056065347, - "grad_norm": 3.12192440032959, - "learning_rate": 3.0339114906557755e-05, - "loss": 0.5389, - "step": 133440 - }, - { - "epoch": 1.179741508866847, - "grad_norm": 1.7643693685531616, - "learning_rate": 3.0337641518885883e-05, - "loss": 0.5749, - "step": 133450 - }, - { - "epoch": 1.1798299121271592, - "grad_norm": 4.117247104644775, - "learning_rate": 3.0336168131214015e-05, - "loss": 0.7087, - "step": 133460 - }, - { - "epoch": 1.1799183153874715, - "grad_norm": 7.315949440002441, - "learning_rate": 3.0334694743542143e-05, - "loss": 0.5827, - "step": 133470 - }, - { - "epoch": 1.1800067186477836, - "grad_norm": 2.433905601501465, - "learning_rate": 3.033322135587027e-05, - "loss": 0.6983, - "step": 133480 - }, - { - "epoch": 1.180095121908096, - "grad_norm": 12.099250793457031, - "learning_rate": 3.0331747968198403e-05, - "loss": 0.6841, - "step": 133490 - }, - { - "epoch": 1.1801835251684083, - "grad_norm": 4.206975936889648, - "learning_rate": 3.033027458052653e-05, - "loss": 0.6199, - "step": 133500 - }, - { - "epoch": 1.1802719284287204, - "grad_norm": 1.3971086740493774, - "learning_rate": 3.032880119285466e-05, - "loss": 0.6861, - "step": 133510 - }, - { - "epoch": 1.1803603316890328, - "grad_norm": 9.218562126159668, - "learning_rate": 3.032732780518279e-05, - "loss": 0.6203, - "step": 133520 - }, - { - "epoch": 1.1804487349493449, - "grad_norm": 2.294931650161743, - "learning_rate": 3.032585441751092e-05, - "loss": 0.6884, - "step": 133530 - }, - { - "epoch": 1.1805371382096572, - "grad_norm": 10.050101280212402, - "learning_rate": 3.0324381029839048e-05, - "loss": 0.6901, - "step": 133540 - }, - { - "epoch": 1.1806255414699693, - "grad_norm": 3.3422930240631104, - "learning_rate": 3.0322907642167177e-05, - "loss": 0.668, - "step": 133550 - }, - { - "epoch": 1.1807139447302817, - "grad_norm": 6.363961219787598, - "learning_rate": 3.032143425449531e-05, - "loss": 0.6081, - "step": 133560 - }, - { - "epoch": 1.1808023479905938, - "grad_norm": 2.307931661605835, - "learning_rate": 3.0319960866823437e-05, - "loss": 0.6154, - "step": 133570 - }, - { - "epoch": 1.1808907512509061, - "grad_norm": 1.586371660232544, - "learning_rate": 3.0318487479151565e-05, - "loss": 0.7045, - "step": 133580 - }, - { - "epoch": 1.1809791545112183, - "grad_norm": 2.354738712310791, - "learning_rate": 3.0317014091479697e-05, - "loss": 0.526, - "step": 133590 - }, - { - "epoch": 1.1810675577715306, - "grad_norm": 2.54079270362854, - "learning_rate": 3.0315540703807825e-05, - "loss": 0.7171, - "step": 133600 - }, - { - "epoch": 1.181155961031843, - "grad_norm": 3.5299763679504395, - "learning_rate": 3.0314067316135953e-05, - "loss": 0.6507, - "step": 133610 - }, - { - "epoch": 1.181244364292155, - "grad_norm": 1.049715518951416, - "learning_rate": 3.0312593928464085e-05, - "loss": 0.5837, - "step": 133620 - }, - { - "epoch": 1.1813327675524674, - "grad_norm": 3.565322160720825, - "learning_rate": 3.0311120540792214e-05, - "loss": 0.6637, - "step": 133630 - }, - { - "epoch": 1.1814211708127795, - "grad_norm": 1.863945722579956, - "learning_rate": 3.0309647153120342e-05, - "loss": 0.6379, - "step": 133640 - }, - { - "epoch": 1.1815095740730919, - "grad_norm": 3.682539701461792, - "learning_rate": 3.0308173765448474e-05, - "loss": 0.5311, - "step": 133650 - }, - { - "epoch": 1.181597977333404, - "grad_norm": 2.9334843158721924, - "learning_rate": 3.03067003777766e-05, - "loss": 0.6481, - "step": 133660 - }, - { - "epoch": 1.1816863805937163, - "grad_norm": 4.022030830383301, - "learning_rate": 3.030522699010473e-05, - "loss": 0.7427, - "step": 133670 - }, - { - "epoch": 1.1817747838540285, - "grad_norm": 1.6121671199798584, - "learning_rate": 3.0303753602432862e-05, - "loss": 0.6308, - "step": 133680 - }, - { - "epoch": 1.1818631871143408, - "grad_norm": 3.0046753883361816, - "learning_rate": 3.0302280214760987e-05, - "loss": 0.586, - "step": 133690 - }, - { - "epoch": 1.181951590374653, - "grad_norm": 2.20027232170105, - "learning_rate": 3.030080682708912e-05, - "loss": 0.6444, - "step": 133700 - }, - { - "epoch": 1.1820399936349653, - "grad_norm": 6.925246715545654, - "learning_rate": 3.029933343941725e-05, - "loss": 0.6971, - "step": 133710 - }, - { - "epoch": 1.1821283968952776, - "grad_norm": 3.790677785873413, - "learning_rate": 3.0297860051745376e-05, - "loss": 0.713, - "step": 133720 - }, - { - "epoch": 1.1822168001555897, - "grad_norm": 2.0312232971191406, - "learning_rate": 3.0296386664073507e-05, - "loss": 0.5332, - "step": 133730 - }, - { - "epoch": 1.182305203415902, - "grad_norm": 7.247586727142334, - "learning_rate": 3.029491327640164e-05, - "loss": 0.663, - "step": 133740 - }, - { - "epoch": 1.1823936066762142, - "grad_norm": 3.275068998336792, - "learning_rate": 3.0293439888729764e-05, - "loss": 0.6624, - "step": 133750 - }, - { - "epoch": 1.1824820099365265, - "grad_norm": 1.7012073993682861, - "learning_rate": 3.0291966501057896e-05, - "loss": 0.5602, - "step": 133760 - }, - { - "epoch": 1.1825704131968386, - "grad_norm": 2.670976161956787, - "learning_rate": 3.029049311338602e-05, - "loss": 0.5404, - "step": 133770 - }, - { - "epoch": 1.182658816457151, - "grad_norm": 0.9218131303787231, - "learning_rate": 3.0289019725714152e-05, - "loss": 0.6014, - "step": 133780 - }, - { - "epoch": 1.182747219717463, - "grad_norm": 8.487752914428711, - "learning_rate": 3.0287546338042284e-05, - "loss": 0.7067, - "step": 133790 - }, - { - "epoch": 1.1828356229777754, - "grad_norm": 1.5247690677642822, - "learning_rate": 3.028607295037041e-05, - "loss": 0.7459, - "step": 133800 - }, - { - "epoch": 1.1829240262380876, - "grad_norm": 1.91983163356781, - "learning_rate": 3.028459956269854e-05, - "loss": 0.7229, - "step": 133810 - }, - { - "epoch": 1.1830124294984, - "grad_norm": 6.579474925994873, - "learning_rate": 3.0283126175026673e-05, - "loss": 0.5639, - "step": 133820 - }, - { - "epoch": 1.1831008327587123, - "grad_norm": 1.699512243270874, - "learning_rate": 3.0281652787354798e-05, - "loss": 0.5564, - "step": 133830 - }, - { - "epoch": 1.1831892360190244, - "grad_norm": 1.832939863204956, - "learning_rate": 3.028017939968293e-05, - "loss": 0.7191, - "step": 133840 - }, - { - "epoch": 1.1832776392793367, - "grad_norm": 0.9650859236717224, - "learning_rate": 3.027870601201106e-05, - "loss": 0.7212, - "step": 133850 - }, - { - "epoch": 1.1833660425396488, - "grad_norm": 9.520347595214844, - "learning_rate": 3.0277232624339186e-05, - "loss": 0.6559, - "step": 133860 - }, - { - "epoch": 1.1834544457999612, - "grad_norm": 2.349287986755371, - "learning_rate": 3.0275759236667318e-05, - "loss": 0.6233, - "step": 133870 - }, - { - "epoch": 1.1835428490602733, - "grad_norm": 1.295008897781372, - "learning_rate": 3.027428584899545e-05, - "loss": 0.7253, - "step": 133880 - }, - { - "epoch": 1.1836312523205856, - "grad_norm": 1.1947423219680786, - "learning_rate": 3.0272812461323574e-05, - "loss": 0.6286, - "step": 133890 - }, - { - "epoch": 1.1837196555808978, - "grad_norm": 2.7769126892089844, - "learning_rate": 3.0271339073651706e-05, - "loss": 0.6528, - "step": 133900 - }, - { - "epoch": 1.18380805884121, - "grad_norm": 1.6827560663223267, - "learning_rate": 3.026986568597983e-05, - "loss": 0.5617, - "step": 133910 - }, - { - "epoch": 1.1838964621015222, - "grad_norm": 8.138789176940918, - "learning_rate": 3.0268392298307963e-05, - "loss": 0.7237, - "step": 133920 - }, - { - "epoch": 1.1839848653618346, - "grad_norm": 10.887385368347168, - "learning_rate": 3.0266918910636095e-05, - "loss": 0.5822, - "step": 133930 - }, - { - "epoch": 1.184073268622147, - "grad_norm": 8.09276008605957, - "learning_rate": 3.026544552296422e-05, - "loss": 0.4782, - "step": 133940 - }, - { - "epoch": 1.184161671882459, - "grad_norm": 2.6022586822509766, - "learning_rate": 3.026397213529235e-05, - "loss": 0.6282, - "step": 133950 - }, - { - "epoch": 1.1842500751427711, - "grad_norm": 2.1033880710601807, - "learning_rate": 3.0262498747620483e-05, - "loss": 0.6039, - "step": 133960 - }, - { - "epoch": 1.1843384784030835, - "grad_norm": 6.447736740112305, - "learning_rate": 3.0261025359948608e-05, - "loss": 0.5947, - "step": 133970 - }, - { - "epoch": 1.1844268816633958, - "grad_norm": 1.5387040376663208, - "learning_rate": 3.025955197227674e-05, - "loss": 0.6481, - "step": 133980 - }, - { - "epoch": 1.184515284923708, - "grad_norm": 2.642251968383789, - "learning_rate": 3.025807858460487e-05, - "loss": 0.6364, - "step": 133990 - }, - { - "epoch": 1.1846036881840203, - "grad_norm": 1.2769719362258911, - "learning_rate": 3.0256605196932997e-05, - "loss": 0.5002, - "step": 134000 - }, - { - "epoch": 1.1846920914443324, - "grad_norm": 1.3712046146392822, - "learning_rate": 3.0255131809261128e-05, - "loss": 0.6655, - "step": 134010 - }, - { - "epoch": 1.1847804947046447, - "grad_norm": 3.0285303592681885, - "learning_rate": 3.0253658421589253e-05, - "loss": 0.714, - "step": 134020 - }, - { - "epoch": 1.1848688979649569, - "grad_norm": 10.945008277893066, - "learning_rate": 3.0252185033917385e-05, - "loss": 0.6471, - "step": 134030 - }, - { - "epoch": 1.1849573012252692, - "grad_norm": 4.571321487426758, - "learning_rate": 3.0250711646245517e-05, - "loss": 0.6315, - "step": 134040 - }, - { - "epoch": 1.1850457044855816, - "grad_norm": 7.85854434967041, - "learning_rate": 3.0249238258573642e-05, - "loss": 0.5895, - "step": 134050 - }, - { - "epoch": 1.1851341077458937, - "grad_norm": 1.8974639177322388, - "learning_rate": 3.0247764870901773e-05, - "loss": 0.6176, - "step": 134060 - }, - { - "epoch": 1.1852225110062058, - "grad_norm": 2.0651192665100098, - "learning_rate": 3.0246291483229905e-05, - "loss": 0.5908, - "step": 134070 - }, - { - "epoch": 1.1853109142665181, - "grad_norm": 0.9960033297538757, - "learning_rate": 3.024481809555803e-05, - "loss": 0.5894, - "step": 134080 - }, - { - "epoch": 1.1853993175268305, - "grad_norm": 1.3338834047317505, - "learning_rate": 3.0243344707886162e-05, - "loss": 0.6926, - "step": 134090 - }, - { - "epoch": 1.1854877207871426, - "grad_norm": 1.4474478960037231, - "learning_rate": 3.0241871320214294e-05, - "loss": 0.69, - "step": 134100 - }, - { - "epoch": 1.185576124047455, - "grad_norm": 4.29463529586792, - "learning_rate": 3.024039793254242e-05, - "loss": 0.7127, - "step": 134110 - }, - { - "epoch": 1.185664527307767, - "grad_norm": 4.743776798248291, - "learning_rate": 3.023892454487055e-05, - "loss": 0.6648, - "step": 134120 - }, - { - "epoch": 1.1857529305680794, - "grad_norm": 1.6514040231704712, - "learning_rate": 3.0237451157198675e-05, - "loss": 0.663, - "step": 134130 - }, - { - "epoch": 1.1858413338283915, - "grad_norm": 1.0738427639007568, - "learning_rate": 3.0235977769526807e-05, - "loss": 0.5619, - "step": 134140 - }, - { - "epoch": 1.1859297370887039, - "grad_norm": 4.679333686828613, - "learning_rate": 3.023450438185494e-05, - "loss": 0.7378, - "step": 134150 - }, - { - "epoch": 1.186018140349016, - "grad_norm": 1.6422126293182373, - "learning_rate": 3.0233030994183064e-05, - "loss": 0.6213, - "step": 134160 - }, - { - "epoch": 1.1861065436093283, - "grad_norm": 5.034276485443115, - "learning_rate": 3.0231557606511196e-05, - "loss": 0.6473, - "step": 134170 - }, - { - "epoch": 1.1861949468696404, - "grad_norm": 1.9419156312942505, - "learning_rate": 3.0230084218839327e-05, - "loss": 0.6596, - "step": 134180 - }, - { - "epoch": 1.1862833501299528, - "grad_norm": 5.711865425109863, - "learning_rate": 3.0228610831167452e-05, - "loss": 0.5762, - "step": 134190 - }, - { - "epoch": 1.1863717533902651, - "grad_norm": 3.984689235687256, - "learning_rate": 3.0227137443495584e-05, - "loss": 0.5392, - "step": 134200 - }, - { - "epoch": 1.1864601566505772, - "grad_norm": 1.3124592304229736, - "learning_rate": 3.0225664055823716e-05, - "loss": 0.7484, - "step": 134210 - }, - { - "epoch": 1.1865485599108896, - "grad_norm": 2.726818561553955, - "learning_rate": 3.022419066815184e-05, - "loss": 0.6481, - "step": 134220 - }, - { - "epoch": 1.1866369631712017, - "grad_norm": 2.6501593589782715, - "learning_rate": 3.0222717280479972e-05, - "loss": 0.5935, - "step": 134230 - }, - { - "epoch": 1.186725366431514, - "grad_norm": 4.129079341888428, - "learning_rate": 3.0221243892808104e-05, - "loss": 0.6558, - "step": 134240 - }, - { - "epoch": 1.1868137696918262, - "grad_norm": 3.0883612632751465, - "learning_rate": 3.021977050513623e-05, - "loss": 0.6343, - "step": 134250 - }, - { - "epoch": 1.1869021729521385, - "grad_norm": 13.528619766235352, - "learning_rate": 3.021829711746436e-05, - "loss": 0.5872, - "step": 134260 - }, - { - "epoch": 1.1869905762124506, - "grad_norm": 1.262600064277649, - "learning_rate": 3.0216823729792486e-05, - "loss": 0.5916, - "step": 134270 - }, - { - "epoch": 1.187078979472763, - "grad_norm": 2.543734073638916, - "learning_rate": 3.0215350342120618e-05, - "loss": 0.6238, - "step": 134280 - }, - { - "epoch": 1.187167382733075, - "grad_norm": 3.074098825454712, - "learning_rate": 3.021387695444875e-05, - "loss": 0.7055, - "step": 134290 - }, - { - "epoch": 1.1872557859933874, - "grad_norm": 1.6259393692016602, - "learning_rate": 3.0212403566776874e-05, - "loss": 0.6377, - "step": 134300 - }, - { - "epoch": 1.1873441892536998, - "grad_norm": 2.3745906352996826, - "learning_rate": 3.0210930179105006e-05, - "loss": 0.5058, - "step": 134310 - }, - { - "epoch": 1.187432592514012, - "grad_norm": 3.7903308868408203, - "learning_rate": 3.0209456791433138e-05, - "loss": 0.6222, - "step": 134320 - }, - { - "epoch": 1.1875209957743242, - "grad_norm": 4.346645832061768, - "learning_rate": 3.0207983403761263e-05, - "loss": 0.596, - "step": 134330 - }, - { - "epoch": 1.1876093990346364, - "grad_norm": 3.0062313079833984, - "learning_rate": 3.0206510016089394e-05, - "loss": 0.5854, - "step": 134340 - }, - { - "epoch": 1.1876978022949487, - "grad_norm": 4.951059818267822, - "learning_rate": 3.0205036628417526e-05, - "loss": 0.5962, - "step": 134350 - }, - { - "epoch": 1.1877862055552608, - "grad_norm": 5.383270263671875, - "learning_rate": 3.020356324074565e-05, - "loss": 0.6678, - "step": 134360 - }, - { - "epoch": 1.1878746088155732, - "grad_norm": 2.7078888416290283, - "learning_rate": 3.0202089853073783e-05, - "loss": 0.7557, - "step": 134370 - }, - { - "epoch": 1.1879630120758853, - "grad_norm": 4.165093421936035, - "learning_rate": 3.020061646540191e-05, - "loss": 0.5552, - "step": 134380 - }, - { - "epoch": 1.1880514153361976, - "grad_norm": 2.7868340015411377, - "learning_rate": 3.019914307773004e-05, - "loss": 0.5464, - "step": 134390 - }, - { - "epoch": 1.1881398185965097, - "grad_norm": 1.2256553173065186, - "learning_rate": 3.019766969005817e-05, - "loss": 0.7446, - "step": 134400 - }, - { - "epoch": 1.188228221856822, - "grad_norm": 1.8391183614730835, - "learning_rate": 3.01961963023863e-05, - "loss": 0.6549, - "step": 134410 - }, - { - "epoch": 1.1883166251171344, - "grad_norm": 2.2443768978118896, - "learning_rate": 3.0194722914714428e-05, - "loss": 0.6133, - "step": 134420 - }, - { - "epoch": 1.1884050283774465, - "grad_norm": 3.4900825023651123, - "learning_rate": 3.019324952704256e-05, - "loss": 0.5998, - "step": 134430 - }, - { - "epoch": 1.188493431637759, - "grad_norm": 4.5909929275512695, - "learning_rate": 3.0191776139370688e-05, - "loss": 0.4663, - "step": 134440 - }, - { - "epoch": 1.188581834898071, - "grad_norm": 2.962786912918091, - "learning_rate": 3.0190302751698817e-05, - "loss": 0.5979, - "step": 134450 - }, - { - "epoch": 1.1886702381583834, - "grad_norm": 1.3459868431091309, - "learning_rate": 3.0188829364026948e-05, - "loss": 0.5572, - "step": 134460 - }, - { - "epoch": 1.1887586414186955, - "grad_norm": 0.845302402973175, - "learning_rate": 3.0187355976355077e-05, - "loss": 0.5532, - "step": 134470 - }, - { - "epoch": 1.1888470446790078, - "grad_norm": 2.38145112991333, - "learning_rate": 3.0185882588683205e-05, - "loss": 0.6991, - "step": 134480 - }, - { - "epoch": 1.18893544793932, - "grad_norm": 10.354094505310059, - "learning_rate": 3.0184409201011333e-05, - "loss": 0.6276, - "step": 134490 - }, - { - "epoch": 1.1890238511996323, - "grad_norm": 1.6070795059204102, - "learning_rate": 3.0182935813339465e-05, - "loss": 0.5707, - "step": 134500 - }, - { - "epoch": 1.1891122544599444, - "grad_norm": 2.7522847652435303, - "learning_rate": 3.0181462425667593e-05, - "loss": 0.7623, - "step": 134510 - }, - { - "epoch": 1.1892006577202567, - "grad_norm": 1.0323193073272705, - "learning_rate": 3.0179989037995722e-05, - "loss": 0.6218, - "step": 134520 - }, - { - "epoch": 1.189289060980569, - "grad_norm": 3.0552427768707275, - "learning_rate": 3.0178515650323854e-05, - "loss": 0.6467, - "step": 134530 - }, - { - "epoch": 1.1893774642408812, - "grad_norm": 3.1509528160095215, - "learning_rate": 3.0177042262651982e-05, - "loss": 0.6141, - "step": 134540 - }, - { - "epoch": 1.1894658675011935, - "grad_norm": 11.069650650024414, - "learning_rate": 3.017556887498011e-05, - "loss": 0.6985, - "step": 134550 - }, - { - "epoch": 1.1895542707615057, - "grad_norm": 6.334070682525635, - "learning_rate": 3.0174095487308242e-05, - "loss": 0.7615, - "step": 134560 - }, - { - "epoch": 1.189642674021818, - "grad_norm": 1.852825403213501, - "learning_rate": 3.017262209963637e-05, - "loss": 0.5602, - "step": 134570 - }, - { - "epoch": 1.1897310772821301, - "grad_norm": 2.627521276473999, - "learning_rate": 3.01711487119645e-05, - "loss": 0.5864, - "step": 134580 - }, - { - "epoch": 1.1898194805424425, - "grad_norm": 4.4445390701293945, - "learning_rate": 3.016967532429263e-05, - "loss": 0.6078, - "step": 134590 - }, - { - "epoch": 1.1899078838027546, - "grad_norm": 2.243546724319458, - "learning_rate": 3.0168201936620755e-05, - "loss": 0.593, - "step": 134600 - }, - { - "epoch": 1.189996287063067, - "grad_norm": 7.853315830230713, - "learning_rate": 3.0166728548948887e-05, - "loss": 0.6843, - "step": 134610 - }, - { - "epoch": 1.190084690323379, - "grad_norm": 1.8134901523590088, - "learning_rate": 3.016525516127702e-05, - "loss": 0.6254, - "step": 134620 - }, - { - "epoch": 1.1901730935836914, - "grad_norm": 2.3110218048095703, - "learning_rate": 3.0163781773605144e-05, - "loss": 0.6844, - "step": 134630 - }, - { - "epoch": 1.1902614968440037, - "grad_norm": 6.4041547775268555, - "learning_rate": 3.0162308385933276e-05, - "loss": 0.6786, - "step": 134640 - }, - { - "epoch": 1.1903499001043159, - "grad_norm": 4.018733978271484, - "learning_rate": 3.0160834998261407e-05, - "loss": 0.6663, - "step": 134650 - }, - { - "epoch": 1.190438303364628, - "grad_norm": 2.0734989643096924, - "learning_rate": 3.0159361610589532e-05, - "loss": 0.5676, - "step": 134660 - }, - { - "epoch": 1.1905267066249403, - "grad_norm": 5.42205810546875, - "learning_rate": 3.0157888222917664e-05, - "loss": 0.7832, - "step": 134670 - }, - { - "epoch": 1.1906151098852527, - "grad_norm": 8.497903823852539, - "learning_rate": 3.0156414835245796e-05, - "loss": 0.5889, - "step": 134680 - }, - { - "epoch": 1.1907035131455648, - "grad_norm": 1.8236331939697266, - "learning_rate": 3.015494144757392e-05, - "loss": 0.7016, - "step": 134690 - }, - { - "epoch": 1.1907919164058771, - "grad_norm": 5.72875452041626, - "learning_rate": 3.0153468059902052e-05, - "loss": 0.5426, - "step": 134700 - }, - { - "epoch": 1.1908803196661892, - "grad_norm": 1.451060175895691, - "learning_rate": 3.0151994672230184e-05, - "loss": 0.6335, - "step": 134710 - }, - { - "epoch": 1.1909687229265016, - "grad_norm": 2.3085947036743164, - "learning_rate": 3.015052128455831e-05, - "loss": 0.6423, - "step": 134720 - }, - { - "epoch": 1.1910571261868137, - "grad_norm": 1.1448899507522583, - "learning_rate": 3.014904789688644e-05, - "loss": 0.4428, - "step": 134730 - }, - { - "epoch": 1.191145529447126, - "grad_norm": 3.119044303894043, - "learning_rate": 3.0147574509214566e-05, - "loss": 0.584, - "step": 134740 - }, - { - "epoch": 1.1912339327074382, - "grad_norm": 3.132662296295166, - "learning_rate": 3.0146101121542698e-05, - "loss": 0.5607, - "step": 134750 - }, - { - "epoch": 1.1913223359677505, - "grad_norm": 4.623294830322266, - "learning_rate": 3.014462773387083e-05, - "loss": 0.6425, - "step": 134760 - }, - { - "epoch": 1.1914107392280626, - "grad_norm": 2.9660074710845947, - "learning_rate": 3.0143154346198954e-05, - "loss": 0.5568, - "step": 134770 - }, - { - "epoch": 1.191499142488375, - "grad_norm": 6.1668877601623535, - "learning_rate": 3.0141680958527086e-05, - "loss": 0.594, - "step": 134780 - }, - { - "epoch": 1.1915875457486873, - "grad_norm": 4.208791732788086, - "learning_rate": 3.0140207570855218e-05, - "loss": 0.6041, - "step": 134790 - }, - { - "epoch": 1.1916759490089994, - "grad_norm": 2.7201950550079346, - "learning_rate": 3.0138734183183343e-05, - "loss": 0.551, - "step": 134800 - }, - { - "epoch": 1.1917643522693118, - "grad_norm": 2.2502012252807617, - "learning_rate": 3.0137260795511475e-05, - "loss": 0.6462, - "step": 134810 - }, - { - "epoch": 1.1918527555296239, - "grad_norm": 2.161388635635376, - "learning_rate": 3.0135787407839606e-05, - "loss": 0.6353, - "step": 134820 - }, - { - "epoch": 1.1919411587899362, - "grad_norm": 1.8835651874542236, - "learning_rate": 3.013431402016773e-05, - "loss": 0.5842, - "step": 134830 - }, - { - "epoch": 1.1920295620502483, - "grad_norm": 1.7087674140930176, - "learning_rate": 3.0132840632495863e-05, - "loss": 0.5753, - "step": 134840 - }, - { - "epoch": 1.1921179653105607, - "grad_norm": 4.540475845336914, - "learning_rate": 3.0131367244823988e-05, - "loss": 0.6149, - "step": 134850 - }, - { - "epoch": 1.1922063685708728, - "grad_norm": 5.799891471862793, - "learning_rate": 3.012989385715212e-05, - "loss": 0.6232, - "step": 134860 - }, - { - "epoch": 1.1922947718311852, - "grad_norm": 2.382371187210083, - "learning_rate": 3.012842046948025e-05, - "loss": 0.5341, - "step": 134870 - }, - { - "epoch": 1.1923831750914973, - "grad_norm": 4.911892414093018, - "learning_rate": 3.0126947081808376e-05, - "loss": 0.6944, - "step": 134880 - }, - { - "epoch": 1.1924715783518096, - "grad_norm": 2.5033485889434814, - "learning_rate": 3.0125473694136508e-05, - "loss": 0.6661, - "step": 134890 - }, - { - "epoch": 1.192559981612122, - "grad_norm": 1.0402098894119263, - "learning_rate": 3.012400030646464e-05, - "loss": 0.5771, - "step": 134900 - }, - { - "epoch": 1.192648384872434, - "grad_norm": 1.4594581127166748, - "learning_rate": 3.0122526918792765e-05, - "loss": 0.6465, - "step": 134910 - }, - { - "epoch": 1.1927367881327464, - "grad_norm": 23.358400344848633, - "learning_rate": 3.0121053531120897e-05, - "loss": 0.6033, - "step": 134920 - }, - { - "epoch": 1.1928251913930585, - "grad_norm": 1.0218687057495117, - "learning_rate": 3.011958014344903e-05, - "loss": 0.6087, - "step": 134930 - }, - { - "epoch": 1.1929135946533709, - "grad_norm": 1.9828563928604126, - "learning_rate": 3.0118106755777153e-05, - "loss": 0.6202, - "step": 134940 - }, - { - "epoch": 1.193001997913683, - "grad_norm": 0.881187379360199, - "learning_rate": 3.0116633368105285e-05, - "loss": 0.7083, - "step": 134950 - }, - { - "epoch": 1.1930904011739953, - "grad_norm": 8.478384971618652, - "learning_rate": 3.011515998043341e-05, - "loss": 0.6248, - "step": 134960 - }, - { - "epoch": 1.1931788044343075, - "grad_norm": 2.483832836151123, - "learning_rate": 3.0113686592761542e-05, - "loss": 0.7632, - "step": 134970 - }, - { - "epoch": 1.1932672076946198, - "grad_norm": 6.684719085693359, - "learning_rate": 3.0112213205089673e-05, - "loss": 0.5586, - "step": 134980 - }, - { - "epoch": 1.193355610954932, - "grad_norm": 1.3750325441360474, - "learning_rate": 3.01107398174178e-05, - "loss": 0.6378, - "step": 134990 - }, - { - "epoch": 1.1934440142152443, - "grad_norm": 3.7526187896728516, - "learning_rate": 3.010926642974593e-05, - "loss": 0.5898, - "step": 135000 - }, - { - "epoch": 1.1935324174755566, - "grad_norm": 1.3710887432098389, - "learning_rate": 3.0107793042074062e-05, - "loss": 0.6396, - "step": 135010 - }, - { - "epoch": 1.1936208207358687, - "grad_norm": 2.045327663421631, - "learning_rate": 3.0106319654402187e-05, - "loss": 0.7543, - "step": 135020 - }, - { - "epoch": 1.193709223996181, - "grad_norm": 1.9241224527359009, - "learning_rate": 3.010484626673032e-05, - "loss": 0.6438, - "step": 135030 - }, - { - "epoch": 1.1937976272564932, - "grad_norm": 1.0467989444732666, - "learning_rate": 3.010337287905845e-05, - "loss": 0.563, - "step": 135040 - }, - { - "epoch": 1.1938860305168055, - "grad_norm": 1.5242828130722046, - "learning_rate": 3.0101899491386575e-05, - "loss": 0.6317, - "step": 135050 - }, - { - "epoch": 1.1939744337771176, - "grad_norm": 4.247598171234131, - "learning_rate": 3.0100426103714707e-05, - "loss": 0.6454, - "step": 135060 - }, - { - "epoch": 1.19406283703743, - "grad_norm": 3.4380462169647217, - "learning_rate": 3.0098952716042832e-05, - "loss": 0.5925, - "step": 135070 - }, - { - "epoch": 1.1941512402977421, - "grad_norm": 3.403031587600708, - "learning_rate": 3.0097479328370964e-05, - "loss": 0.6129, - "step": 135080 - }, - { - "epoch": 1.1942396435580545, - "grad_norm": 2.1044533252716064, - "learning_rate": 3.0096005940699096e-05, - "loss": 0.639, - "step": 135090 - }, - { - "epoch": 1.1943280468183666, - "grad_norm": 4.494055271148682, - "learning_rate": 3.009453255302722e-05, - "loss": 0.6652, - "step": 135100 - }, - { - "epoch": 1.194416450078679, - "grad_norm": 3.6114072799682617, - "learning_rate": 3.0093059165355352e-05, - "loss": 0.6758, - "step": 135110 - }, - { - "epoch": 1.1945048533389913, - "grad_norm": 2.921370506286621, - "learning_rate": 3.0091585777683484e-05, - "loss": 0.4851, - "step": 135120 - }, - { - "epoch": 1.1945932565993034, - "grad_norm": 1.47972571849823, - "learning_rate": 3.009011239001161e-05, - "loss": 0.5908, - "step": 135130 - }, - { - "epoch": 1.1946816598596157, - "grad_norm": 2.664775848388672, - "learning_rate": 3.008863900233974e-05, - "loss": 0.5439, - "step": 135140 - }, - { - "epoch": 1.1947700631199278, - "grad_norm": 2.8714442253112793, - "learning_rate": 3.0087165614667872e-05, - "loss": 0.7995, - "step": 135150 - }, - { - "epoch": 1.1948584663802402, - "grad_norm": 2.735581159591675, - "learning_rate": 3.0085692226995997e-05, - "loss": 0.54, - "step": 135160 - }, - { - "epoch": 1.1949468696405523, - "grad_norm": 1.93233060836792, - "learning_rate": 3.008421883932413e-05, - "loss": 0.7647, - "step": 135170 - }, - { - "epoch": 1.1950352729008646, - "grad_norm": 1.9108073711395264, - "learning_rate": 3.008274545165226e-05, - "loss": 0.591, - "step": 135180 - }, - { - "epoch": 1.1951236761611768, - "grad_norm": 1.6401818990707397, - "learning_rate": 3.0081272063980386e-05, - "loss": 0.5852, - "step": 135190 - }, - { - "epoch": 1.195212079421489, - "grad_norm": 5.186831474304199, - "learning_rate": 3.0079798676308518e-05, - "loss": 0.5745, - "step": 135200 - }, - { - "epoch": 1.1953004826818012, - "grad_norm": 2.3921542167663574, - "learning_rate": 3.0078325288636643e-05, - "loss": 0.5655, - "step": 135210 - }, - { - "epoch": 1.1953888859421136, - "grad_norm": 1.8918118476867676, - "learning_rate": 3.0076851900964774e-05, - "loss": 0.6225, - "step": 135220 - }, - { - "epoch": 1.195477289202426, - "grad_norm": 4.41324520111084, - "learning_rate": 3.0075378513292906e-05, - "loss": 0.5583, - "step": 135230 - }, - { - "epoch": 1.195565692462738, - "grad_norm": 2.0892438888549805, - "learning_rate": 3.007390512562103e-05, - "loss": 0.6693, - "step": 135240 - }, - { - "epoch": 1.1956540957230501, - "grad_norm": 1.8570963144302368, - "learning_rate": 3.0072431737949163e-05, - "loss": 0.6538, - "step": 135250 - }, - { - "epoch": 1.1957424989833625, - "grad_norm": 1.8314871788024902, - "learning_rate": 3.0070958350277295e-05, - "loss": 0.5374, - "step": 135260 - }, - { - "epoch": 1.1958309022436748, - "grad_norm": 7.7529802322387695, - "learning_rate": 3.006948496260542e-05, - "loss": 0.5959, - "step": 135270 - }, - { - "epoch": 1.195919305503987, - "grad_norm": 8.081664085388184, - "learning_rate": 3.006801157493355e-05, - "loss": 0.5608, - "step": 135280 - }, - { - "epoch": 1.1960077087642993, - "grad_norm": 11.194672584533691, - "learning_rate": 3.0066538187261683e-05, - "loss": 0.589, - "step": 135290 - }, - { - "epoch": 1.1960961120246114, - "grad_norm": 1.2424057722091675, - "learning_rate": 3.0065064799589808e-05, - "loss": 0.5983, - "step": 135300 - }, - { - "epoch": 1.1961845152849238, - "grad_norm": 3.588252544403076, - "learning_rate": 3.006359141191794e-05, - "loss": 0.5889, - "step": 135310 - }, - { - "epoch": 1.1962729185452359, - "grad_norm": 4.2321953773498535, - "learning_rate": 3.0062118024246068e-05, - "loss": 0.6925, - "step": 135320 - }, - { - "epoch": 1.1963613218055482, - "grad_norm": 3.803393602371216, - "learning_rate": 3.0060644636574196e-05, - "loss": 0.6396, - "step": 135330 - }, - { - "epoch": 1.1964497250658603, - "grad_norm": 2.1365442276000977, - "learning_rate": 3.0059171248902328e-05, - "loss": 0.5557, - "step": 135340 - }, - { - "epoch": 1.1965381283261727, - "grad_norm": 5.730929851531982, - "learning_rate": 3.0057697861230456e-05, - "loss": 0.5322, - "step": 135350 - }, - { - "epoch": 1.1966265315864848, - "grad_norm": 2.514432191848755, - "learning_rate": 3.0056224473558585e-05, - "loss": 0.488, - "step": 135360 - }, - { - "epoch": 1.1967149348467971, - "grad_norm": 2.984315872192383, - "learning_rate": 3.0054751085886717e-05, - "loss": 0.6205, - "step": 135370 - }, - { - "epoch": 1.1968033381071095, - "grad_norm": 0.8397032022476196, - "learning_rate": 3.0053277698214845e-05, - "loss": 0.5801, - "step": 135380 - }, - { - "epoch": 1.1968917413674216, - "grad_norm": 3.1440913677215576, - "learning_rate": 3.0051804310542973e-05, - "loss": 0.8333, - "step": 135390 - }, - { - "epoch": 1.196980144627734, - "grad_norm": 2.5497305393218994, - "learning_rate": 3.0050330922871105e-05, - "loss": 0.6406, - "step": 135400 - }, - { - "epoch": 1.197068547888046, - "grad_norm": 2.3228108882904053, - "learning_rate": 3.0048857535199233e-05, - "loss": 0.5936, - "step": 135410 - }, - { - "epoch": 1.1971569511483584, - "grad_norm": 3.424015998840332, - "learning_rate": 3.0047384147527362e-05, - "loss": 0.7542, - "step": 135420 - }, - { - "epoch": 1.1972453544086705, - "grad_norm": 4.755439281463623, - "learning_rate": 3.004591075985549e-05, - "loss": 0.6213, - "step": 135430 - }, - { - "epoch": 1.1973337576689829, - "grad_norm": 1.2206058502197266, - "learning_rate": 3.0044437372183622e-05, - "loss": 0.5459, - "step": 135440 - }, - { - "epoch": 1.197422160929295, - "grad_norm": 4.128241062164307, - "learning_rate": 3.004296398451175e-05, - "loss": 0.5197, - "step": 135450 - }, - { - "epoch": 1.1975105641896073, - "grad_norm": 1.3135501146316528, - "learning_rate": 3.004149059683988e-05, - "loss": 0.5041, - "step": 135460 - }, - { - "epoch": 1.1975989674499194, - "grad_norm": 1.1277292966842651, - "learning_rate": 3.004001720916801e-05, - "loss": 0.5991, - "step": 135470 - }, - { - "epoch": 1.1976873707102318, - "grad_norm": 5.00355339050293, - "learning_rate": 3.003854382149614e-05, - "loss": 0.6647, - "step": 135480 - }, - { - "epoch": 1.1977757739705441, - "grad_norm": 2.198127031326294, - "learning_rate": 3.0037070433824267e-05, - "loss": 0.5137, - "step": 135490 - }, - { - "epoch": 1.1978641772308563, - "grad_norm": 3.6041066646575928, - "learning_rate": 3.00355970461524e-05, - "loss": 0.5561, - "step": 135500 - }, - { - "epoch": 1.1979525804911686, - "grad_norm": 3.7992098331451416, - "learning_rate": 3.0034123658480527e-05, - "loss": 0.6922, - "step": 135510 - }, - { - "epoch": 1.1980409837514807, - "grad_norm": 1.5862653255462646, - "learning_rate": 3.0032650270808655e-05, - "loss": 0.5576, - "step": 135520 - }, - { - "epoch": 1.198129387011793, - "grad_norm": 3.370110034942627, - "learning_rate": 3.0031176883136787e-05, - "loss": 0.5795, - "step": 135530 - }, - { - "epoch": 1.1982177902721052, - "grad_norm": 7.045070648193359, - "learning_rate": 3.0029703495464912e-05, - "loss": 0.7336, - "step": 135540 - }, - { - "epoch": 1.1983061935324175, - "grad_norm": 6.976004600524902, - "learning_rate": 3.0028230107793044e-05, - "loss": 0.6323, - "step": 135550 - }, - { - "epoch": 1.1983945967927296, - "grad_norm": 15.105899810791016, - "learning_rate": 3.0026756720121176e-05, - "loss": 0.5799, - "step": 135560 - }, - { - "epoch": 1.198483000053042, - "grad_norm": 1.8034095764160156, - "learning_rate": 3.00252833324493e-05, - "loss": 0.5372, - "step": 135570 - }, - { - "epoch": 1.198571403313354, - "grad_norm": 4.731830596923828, - "learning_rate": 3.0023809944777432e-05, - "loss": 0.5295, - "step": 135580 - }, - { - "epoch": 1.1986598065736664, - "grad_norm": 3.444960832595825, - "learning_rate": 3.0022336557105564e-05, - "loss": 0.3763, - "step": 135590 - }, - { - "epoch": 1.1987482098339788, - "grad_norm": 12.027643203735352, - "learning_rate": 3.002086316943369e-05, - "loss": 0.6959, - "step": 135600 - }, - { - "epoch": 1.198836613094291, - "grad_norm": 3.482025384902954, - "learning_rate": 3.001938978176182e-05, - "loss": 0.6573, - "step": 135610 - }, - { - "epoch": 1.1989250163546032, - "grad_norm": 1.7336550951004028, - "learning_rate": 3.0017916394089953e-05, - "loss": 0.5449, - "step": 135620 - }, - { - "epoch": 1.1990134196149154, - "grad_norm": 2.5900959968566895, - "learning_rate": 3.0016443006418077e-05, - "loss": 0.6094, - "step": 135630 - }, - { - "epoch": 1.1991018228752277, - "grad_norm": 1.0362385511398315, - "learning_rate": 3.001496961874621e-05, - "loss": 0.5825, - "step": 135640 - }, - { - "epoch": 1.1991902261355398, - "grad_norm": 1.4746947288513184, - "learning_rate": 3.001349623107434e-05, - "loss": 0.559, - "step": 135650 - }, - { - "epoch": 1.1992786293958522, - "grad_norm": 3.1542747020721436, - "learning_rate": 3.0012022843402466e-05, - "loss": 0.6825, - "step": 135660 - }, - { - "epoch": 1.1993670326561643, - "grad_norm": 1.3476935625076294, - "learning_rate": 3.0010549455730598e-05, - "loss": 0.6431, - "step": 135670 - }, - { - "epoch": 1.1994554359164766, - "grad_norm": 2.6179378032684326, - "learning_rate": 3.0009076068058723e-05, - "loss": 0.6364, - "step": 135680 - }, - { - "epoch": 1.1995438391767888, - "grad_norm": 7.111267566680908, - "learning_rate": 3.0007602680386854e-05, - "loss": 0.6319, - "step": 135690 - }, - { - "epoch": 1.199632242437101, - "grad_norm": 1.1153324842453003, - "learning_rate": 3.0006129292714986e-05, - "loss": 0.6491, - "step": 135700 - }, - { - "epoch": 1.1997206456974134, - "grad_norm": 2.242643356323242, - "learning_rate": 3.000465590504311e-05, - "loss": 0.637, - "step": 135710 - }, - { - "epoch": 1.1998090489577256, - "grad_norm": 1.3586938381195068, - "learning_rate": 3.0003182517371243e-05, - "loss": 0.6326, - "step": 135720 - }, - { - "epoch": 1.199897452218038, - "grad_norm": 1.611083745956421, - "learning_rate": 3.0001709129699375e-05, - "loss": 0.6013, - "step": 135730 - }, - { - "epoch": 1.19998585547835, - "grad_norm": 1.6657063961029053, - "learning_rate": 3.00002357420275e-05, - "loss": 0.6407, - "step": 135740 - }, - { - "epoch": 1.2000742587386624, - "grad_norm": 1.2029578685760498, - "learning_rate": 2.999876235435563e-05, - "loss": 0.6533, - "step": 135750 - }, - { - "epoch": 1.2001626619989745, - "grad_norm": 1.8247076272964478, - "learning_rate": 2.9997288966683763e-05, - "loss": 0.5325, - "step": 135760 - }, - { - "epoch": 1.2002510652592868, - "grad_norm": 2.0752828121185303, - "learning_rate": 2.9995815579011888e-05, - "loss": 0.6111, - "step": 135770 - }, - { - "epoch": 1.200339468519599, - "grad_norm": 1.8151401281356812, - "learning_rate": 2.999434219134002e-05, - "loss": 0.592, - "step": 135780 - }, - { - "epoch": 1.2004278717799113, - "grad_norm": 3.682612657546997, - "learning_rate": 2.9992868803668145e-05, - "loss": 0.6635, - "step": 135790 - }, - { - "epoch": 1.2005162750402234, - "grad_norm": 4.259321212768555, - "learning_rate": 2.9991395415996276e-05, - "loss": 0.7588, - "step": 135800 - }, - { - "epoch": 1.2006046783005357, - "grad_norm": 4.106159210205078, - "learning_rate": 2.9989922028324408e-05, - "loss": 0.6797, - "step": 135810 - }, - { - "epoch": 1.200693081560848, - "grad_norm": 4.49326229095459, - "learning_rate": 2.9988448640652533e-05, - "loss": 0.5601, - "step": 135820 - }, - { - "epoch": 1.2007814848211602, - "grad_norm": 2.7030065059661865, - "learning_rate": 2.9986975252980665e-05, - "loss": 0.6338, - "step": 135830 - }, - { - "epoch": 1.2008698880814723, - "grad_norm": 2.13750958442688, - "learning_rate": 2.9985501865308797e-05, - "loss": 0.738, - "step": 135840 - }, - { - "epoch": 1.2009582913417847, - "grad_norm": 8.595383644104004, - "learning_rate": 2.998402847763692e-05, - "loss": 0.5787, - "step": 135850 - }, - { - "epoch": 1.201046694602097, - "grad_norm": 1.8548564910888672, - "learning_rate": 2.9982555089965053e-05, - "loss": 0.6345, - "step": 135860 - }, - { - "epoch": 1.2011350978624091, - "grad_norm": 1.257297158241272, - "learning_rate": 2.9981081702293185e-05, - "loss": 0.6923, - "step": 135870 - }, - { - "epoch": 1.2012235011227215, - "grad_norm": 7.5779547691345215, - "learning_rate": 2.997960831462131e-05, - "loss": 0.5577, - "step": 135880 - }, - { - "epoch": 1.2013119043830336, - "grad_norm": 2.7143030166625977, - "learning_rate": 2.9978134926949442e-05, - "loss": 0.6172, - "step": 135890 - }, - { - "epoch": 1.201400307643346, - "grad_norm": 1.1999244689941406, - "learning_rate": 2.9976661539277567e-05, - "loss": 0.6509, - "step": 135900 - }, - { - "epoch": 1.201488710903658, - "grad_norm": 1.4042283296585083, - "learning_rate": 2.99751881516057e-05, - "loss": 0.4573, - "step": 135910 - }, - { - "epoch": 1.2015771141639704, - "grad_norm": 2.753455877304077, - "learning_rate": 2.997371476393383e-05, - "loss": 0.5865, - "step": 135920 - }, - { - "epoch": 1.2016655174242825, - "grad_norm": 7.5508270263671875, - "learning_rate": 2.9972241376261955e-05, - "loss": 0.6316, - "step": 135930 - }, - { - "epoch": 1.2017539206845949, - "grad_norm": 4.950708389282227, - "learning_rate": 2.9970767988590087e-05, - "loss": 0.8057, - "step": 135940 - }, - { - "epoch": 1.201842323944907, - "grad_norm": 2.173250436782837, - "learning_rate": 2.996929460091822e-05, - "loss": 0.7843, - "step": 135950 - }, - { - "epoch": 1.2019307272052193, - "grad_norm": 2.9072792530059814, - "learning_rate": 2.9967821213246344e-05, - "loss": 0.5655, - "step": 135960 - }, - { - "epoch": 1.2020191304655317, - "grad_norm": 1.9952467679977417, - "learning_rate": 2.9966347825574475e-05, - "loss": 0.5429, - "step": 135970 - }, - { - "epoch": 1.2021075337258438, - "grad_norm": 1.2081823348999023, - "learning_rate": 2.9964874437902607e-05, - "loss": 0.6011, - "step": 135980 - }, - { - "epoch": 1.2021959369861561, - "grad_norm": 3.3362925052642822, - "learning_rate": 2.9963401050230732e-05, - "loss": 0.5857, - "step": 135990 - }, - { - "epoch": 1.2022843402464682, - "grad_norm": 2.562812328338623, - "learning_rate": 2.9961927662558864e-05, - "loss": 0.6045, - "step": 136000 - }, - { - "epoch": 1.2023727435067806, - "grad_norm": 7.28032922744751, - "learning_rate": 2.996045427488699e-05, - "loss": 0.6275, - "step": 136010 - }, - { - "epoch": 1.2024611467670927, - "grad_norm": 2.5389790534973145, - "learning_rate": 2.995898088721512e-05, - "loss": 0.7235, - "step": 136020 - }, - { - "epoch": 1.202549550027405, - "grad_norm": 4.076797008514404, - "learning_rate": 2.9957507499543252e-05, - "loss": 0.5264, - "step": 136030 - }, - { - "epoch": 1.2026379532877172, - "grad_norm": 3.2233691215515137, - "learning_rate": 2.9956034111871377e-05, - "loss": 0.6682, - "step": 136040 - }, - { - "epoch": 1.2027263565480295, - "grad_norm": 1.448582649230957, - "learning_rate": 2.995456072419951e-05, - "loss": 0.576, - "step": 136050 - }, - { - "epoch": 1.2028147598083416, - "grad_norm": 5.4315032958984375, - "learning_rate": 2.995308733652764e-05, - "loss": 0.531, - "step": 136060 - }, - { - "epoch": 1.202903163068654, - "grad_norm": 4.017689228057861, - "learning_rate": 2.9951613948855766e-05, - "loss": 0.6048, - "step": 136070 - }, - { - "epoch": 1.2029915663289663, - "grad_norm": 5.030168056488037, - "learning_rate": 2.9950140561183897e-05, - "loss": 0.5581, - "step": 136080 - }, - { - "epoch": 1.2030799695892784, - "grad_norm": 4.050236701965332, - "learning_rate": 2.994866717351203e-05, - "loss": 0.6695, - "step": 136090 - }, - { - "epoch": 1.2031683728495908, - "grad_norm": 1.908950924873352, - "learning_rate": 2.9947193785840154e-05, - "loss": 0.7135, - "step": 136100 - }, - { - "epoch": 1.203256776109903, - "grad_norm": 1.759932518005371, - "learning_rate": 2.9945720398168286e-05, - "loss": 0.6404, - "step": 136110 - }, - { - "epoch": 1.2033451793702152, - "grad_norm": 1.37447190284729, - "learning_rate": 2.9944247010496418e-05, - "loss": 0.5635, - "step": 136120 - }, - { - "epoch": 1.2034335826305274, - "grad_norm": 12.4634370803833, - "learning_rate": 2.9942773622824543e-05, - "loss": 0.6053, - "step": 136130 - }, - { - "epoch": 1.2035219858908397, - "grad_norm": 2.978100538253784, - "learning_rate": 2.9941300235152674e-05, - "loss": 0.7483, - "step": 136140 - }, - { - "epoch": 1.2036103891511518, - "grad_norm": 1.9565192461013794, - "learning_rate": 2.99398268474808e-05, - "loss": 0.5541, - "step": 136150 - }, - { - "epoch": 1.2036987924114642, - "grad_norm": 7.531537055969238, - "learning_rate": 2.993835345980893e-05, - "loss": 0.4802, - "step": 136160 - }, - { - "epoch": 1.2037871956717763, - "grad_norm": 3.354962110519409, - "learning_rate": 2.9936880072137063e-05, - "loss": 0.6952, - "step": 136170 - }, - { - "epoch": 1.2038755989320886, - "grad_norm": 6.775915145874023, - "learning_rate": 2.9935406684465188e-05, - "loss": 0.6826, - "step": 136180 - }, - { - "epoch": 1.203964002192401, - "grad_norm": 2.227693796157837, - "learning_rate": 2.993393329679332e-05, - "loss": 0.5377, - "step": 136190 - }, - { - "epoch": 1.204052405452713, - "grad_norm": 2.2532155513763428, - "learning_rate": 2.993245990912145e-05, - "loss": 0.566, - "step": 136200 - }, - { - "epoch": 1.2041408087130254, - "grad_norm": 3.50679874420166, - "learning_rate": 2.9930986521449576e-05, - "loss": 0.5649, - "step": 136210 - }, - { - "epoch": 1.2042292119733375, - "grad_norm": 11.085139274597168, - "learning_rate": 2.9929513133777708e-05, - "loss": 0.5629, - "step": 136220 - }, - { - "epoch": 1.2043176152336499, - "grad_norm": 2.4664134979248047, - "learning_rate": 2.992803974610584e-05, - "loss": 0.5645, - "step": 136230 - }, - { - "epoch": 1.204406018493962, - "grad_norm": 2.66837215423584, - "learning_rate": 2.9926566358433965e-05, - "loss": 0.5899, - "step": 136240 - }, - { - "epoch": 1.2044944217542743, - "grad_norm": 2.3268489837646484, - "learning_rate": 2.9925092970762096e-05, - "loss": 0.6027, - "step": 136250 - }, - { - "epoch": 1.2045828250145865, - "grad_norm": 4.256599426269531, - "learning_rate": 2.9923619583090225e-05, - "loss": 0.5748, - "step": 136260 - }, - { - "epoch": 1.2046712282748988, - "grad_norm": 16.19921112060547, - "learning_rate": 2.9922146195418353e-05, - "loss": 0.6162, - "step": 136270 - }, - { - "epoch": 1.204759631535211, - "grad_norm": 1.312179446220398, - "learning_rate": 2.9920672807746485e-05, - "loss": 0.5321, - "step": 136280 - }, - { - "epoch": 1.2048480347955233, - "grad_norm": 2.1660702228546143, - "learning_rate": 2.9919199420074613e-05, - "loss": 0.6213, - "step": 136290 - }, - { - "epoch": 1.2049364380558356, - "grad_norm": 1.8339970111846924, - "learning_rate": 2.991772603240274e-05, - "loss": 0.5352, - "step": 136300 - }, - { - "epoch": 1.2050248413161477, - "grad_norm": 1.6349108219146729, - "learning_rate": 2.9916252644730873e-05, - "loss": 0.6056, - "step": 136310 - }, - { - "epoch": 1.20511324457646, - "grad_norm": 4.372035980224609, - "learning_rate": 2.9914779257059e-05, - "loss": 0.545, - "step": 136320 - }, - { - "epoch": 1.2052016478367722, - "grad_norm": 2.2924644947052, - "learning_rate": 2.991330586938713e-05, - "loss": 0.6779, - "step": 136330 - }, - { - "epoch": 1.2052900510970845, - "grad_norm": 1.4742251634597778, - "learning_rate": 2.9911832481715262e-05, - "loss": 0.658, - "step": 136340 - }, - { - "epoch": 1.2053784543573967, - "grad_norm": 5.213178634643555, - "learning_rate": 2.991035909404339e-05, - "loss": 0.6454, - "step": 136350 - }, - { - "epoch": 1.205466857617709, - "grad_norm": 8.827898979187012, - "learning_rate": 2.990888570637152e-05, - "loss": 0.7178, - "step": 136360 - }, - { - "epoch": 1.2055552608780211, - "grad_norm": 1.477850317955017, - "learning_rate": 2.9907412318699647e-05, - "loss": 0.5543, - "step": 136370 - }, - { - "epoch": 1.2056436641383335, - "grad_norm": 2.4808008670806885, - "learning_rate": 2.990593893102778e-05, - "loss": 0.5529, - "step": 136380 - }, - { - "epoch": 1.2057320673986456, - "grad_norm": 11.636964797973633, - "learning_rate": 2.9904465543355907e-05, - "loss": 0.5901, - "step": 136390 - }, - { - "epoch": 1.205820470658958, - "grad_norm": 1.3622137308120728, - "learning_rate": 2.9902992155684035e-05, - "loss": 0.6249, - "step": 136400 - }, - { - "epoch": 1.2059088739192703, - "grad_norm": 2.1333587169647217, - "learning_rate": 2.9901518768012167e-05, - "loss": 0.6967, - "step": 136410 - }, - { - "epoch": 1.2059972771795824, - "grad_norm": 6.147896766662598, - "learning_rate": 2.9900045380340295e-05, - "loss": 0.6283, - "step": 136420 - }, - { - "epoch": 1.2060856804398945, - "grad_norm": 2.3316831588745117, - "learning_rate": 2.9898571992668424e-05, - "loss": 0.6949, - "step": 136430 - }, - { - "epoch": 1.2061740837002068, - "grad_norm": 1.2832236289978027, - "learning_rate": 2.9897098604996555e-05, - "loss": 0.5576, - "step": 136440 - }, - { - "epoch": 1.2062624869605192, - "grad_norm": 1.4624751806259155, - "learning_rate": 2.9895625217324684e-05, - "loss": 0.6055, - "step": 136450 - }, - { - "epoch": 1.2063508902208313, - "grad_norm": 1.3734575510025024, - "learning_rate": 2.9894151829652812e-05, - "loss": 0.4974, - "step": 136460 - }, - { - "epoch": 1.2064392934811436, - "grad_norm": 1.2721678018569946, - "learning_rate": 2.9892678441980944e-05, - "loss": 0.5278, - "step": 136470 - }, - { - "epoch": 1.2065276967414558, - "grad_norm": 0.8982300758361816, - "learning_rate": 2.989120505430907e-05, - "loss": 0.7269, - "step": 136480 - }, - { - "epoch": 1.206616100001768, - "grad_norm": 0.6635897159576416, - "learning_rate": 2.98897316666372e-05, - "loss": 0.6442, - "step": 136490 - }, - { - "epoch": 1.2067045032620802, - "grad_norm": 1.790482759475708, - "learning_rate": 2.9888258278965332e-05, - "loss": 0.5081, - "step": 136500 - }, - { - "epoch": 1.2067929065223926, - "grad_norm": 1.2175158262252808, - "learning_rate": 2.9886784891293457e-05, - "loss": 0.5805, - "step": 136510 - }, - { - "epoch": 1.2068813097827047, - "grad_norm": 0.6000199913978577, - "learning_rate": 2.988531150362159e-05, - "loss": 0.5517, - "step": 136520 - }, - { - "epoch": 1.206969713043017, - "grad_norm": 2.8189423084259033, - "learning_rate": 2.988383811594972e-05, - "loss": 0.6113, - "step": 136530 - }, - { - "epoch": 1.2070581163033292, - "grad_norm": 4.3407111167907715, - "learning_rate": 2.9882364728277846e-05, - "loss": 0.5647, - "step": 136540 - }, - { - "epoch": 1.2071465195636415, - "grad_norm": 8.9413480758667, - "learning_rate": 2.9880891340605978e-05, - "loss": 0.6584, - "step": 136550 - }, - { - "epoch": 1.2072349228239538, - "grad_norm": 10.368038177490234, - "learning_rate": 2.987941795293411e-05, - "loss": 0.6513, - "step": 136560 - }, - { - "epoch": 1.207323326084266, - "grad_norm": 1.3307462930679321, - "learning_rate": 2.9877944565262234e-05, - "loss": 0.5898, - "step": 136570 - }, - { - "epoch": 1.2074117293445783, - "grad_norm": 1.3528331518173218, - "learning_rate": 2.9876471177590366e-05, - "loss": 0.6154, - "step": 136580 - }, - { - "epoch": 1.2075001326048904, - "grad_norm": 2.554675579071045, - "learning_rate": 2.9874997789918498e-05, - "loss": 0.6681, - "step": 136590 - }, - { - "epoch": 1.2075885358652028, - "grad_norm": 2.9656012058258057, - "learning_rate": 2.9873524402246623e-05, - "loss": 0.7641, - "step": 136600 - }, - { - "epoch": 1.2076769391255149, - "grad_norm": 6.637085914611816, - "learning_rate": 2.9872051014574754e-05, - "loss": 0.6516, - "step": 136610 - }, - { - "epoch": 1.2077653423858272, - "grad_norm": 1.9870898723602295, - "learning_rate": 2.987057762690288e-05, - "loss": 0.6032, - "step": 136620 - }, - { - "epoch": 1.2078537456461393, - "grad_norm": 23.89141082763672, - "learning_rate": 2.986910423923101e-05, - "loss": 0.5617, - "step": 136630 - }, - { - "epoch": 1.2079421489064517, - "grad_norm": 5.746498107910156, - "learning_rate": 2.9867630851559143e-05, - "loss": 0.4003, - "step": 136640 - }, - { - "epoch": 1.2080305521667638, - "grad_norm": 3.930852174758911, - "learning_rate": 2.9866157463887268e-05, - "loss": 0.7007, - "step": 136650 - }, - { - "epoch": 1.2081189554270761, - "grad_norm": 6.953625202178955, - "learning_rate": 2.98646840762154e-05, - "loss": 0.678, - "step": 136660 - }, - { - "epoch": 1.2082073586873885, - "grad_norm": 4.143732070922852, - "learning_rate": 2.986321068854353e-05, - "loss": 0.6667, - "step": 136670 - }, - { - "epoch": 1.2082957619477006, - "grad_norm": 0.9013015627861023, - "learning_rate": 2.9861737300871656e-05, - "loss": 0.4879, - "step": 136680 - }, - { - "epoch": 1.208384165208013, - "grad_norm": 5.37971305847168, - "learning_rate": 2.9860263913199788e-05, - "loss": 0.6711, - "step": 136690 - }, - { - "epoch": 1.208472568468325, - "grad_norm": 3.4336211681365967, - "learning_rate": 2.985879052552792e-05, - "loss": 0.6039, - "step": 136700 - }, - { - "epoch": 1.2085609717286374, - "grad_norm": 2.0258169174194336, - "learning_rate": 2.9857317137856045e-05, - "loss": 0.5788, - "step": 136710 - }, - { - "epoch": 1.2086493749889495, - "grad_norm": 3.508629083633423, - "learning_rate": 2.9855843750184176e-05, - "loss": 0.483, - "step": 136720 - }, - { - "epoch": 1.2087377782492619, - "grad_norm": 1.486860752105713, - "learning_rate": 2.98543703625123e-05, - "loss": 0.7178, - "step": 136730 - }, - { - "epoch": 1.208826181509574, - "grad_norm": 6.615573883056641, - "learning_rate": 2.9852896974840433e-05, - "loss": 0.6733, - "step": 136740 - }, - { - "epoch": 1.2089145847698863, - "grad_norm": 2.1742522716522217, - "learning_rate": 2.9851423587168565e-05, - "loss": 0.6439, - "step": 136750 - }, - { - "epoch": 1.2090029880301985, - "grad_norm": 1.98505437374115, - "learning_rate": 2.984995019949669e-05, - "loss": 0.7232, - "step": 136760 - }, - { - "epoch": 1.2090913912905108, - "grad_norm": 2.0811924934387207, - "learning_rate": 2.984847681182482e-05, - "loss": 0.7184, - "step": 136770 - }, - { - "epoch": 1.2091797945508231, - "grad_norm": 1.3345839977264404, - "learning_rate": 2.9847003424152953e-05, - "loss": 0.643, - "step": 136780 - }, - { - "epoch": 1.2092681978111353, - "grad_norm": 12.411947250366211, - "learning_rate": 2.984553003648108e-05, - "loss": 0.5643, - "step": 136790 - }, - { - "epoch": 1.2093566010714476, - "grad_norm": 1.6442501544952393, - "learning_rate": 2.984405664880921e-05, - "loss": 0.6276, - "step": 136800 - }, - { - "epoch": 1.2094450043317597, - "grad_norm": 0.918235719203949, - "learning_rate": 2.9842583261137342e-05, - "loss": 0.5083, - "step": 136810 - }, - { - "epoch": 1.209533407592072, - "grad_norm": 3.278961181640625, - "learning_rate": 2.9841109873465467e-05, - "loss": 0.5528, - "step": 136820 - }, - { - "epoch": 1.2096218108523842, - "grad_norm": 3.771461009979248, - "learning_rate": 2.98396364857936e-05, - "loss": 0.6625, - "step": 136830 - }, - { - "epoch": 1.2097102141126965, - "grad_norm": 1.090736746788025, - "learning_rate": 2.9838163098121724e-05, - "loss": 0.5648, - "step": 136840 - }, - { - "epoch": 1.2097986173730086, - "grad_norm": 0.7083085179328918, - "learning_rate": 2.9836689710449855e-05, - "loss": 0.6447, - "step": 136850 - }, - { - "epoch": 1.209887020633321, - "grad_norm": 12.285191535949707, - "learning_rate": 2.9835216322777987e-05, - "loss": 0.8053, - "step": 136860 - }, - { - "epoch": 1.209975423893633, - "grad_norm": 2.7982661724090576, - "learning_rate": 2.9833742935106112e-05, - "loss": 0.4441, - "step": 136870 - }, - { - "epoch": 1.2100638271539454, - "grad_norm": 2.4491517543792725, - "learning_rate": 2.9832269547434244e-05, - "loss": 0.6215, - "step": 136880 - }, - { - "epoch": 1.2101522304142578, - "grad_norm": 6.341678142547607, - "learning_rate": 2.9830796159762375e-05, - "loss": 0.6386, - "step": 136890 - }, - { - "epoch": 1.21024063367457, - "grad_norm": 1.4119036197662354, - "learning_rate": 2.98293227720905e-05, - "loss": 0.4223, - "step": 136900 - }, - { - "epoch": 1.2103290369348823, - "grad_norm": 2.488222122192383, - "learning_rate": 2.9827849384418632e-05, - "loss": 0.5413, - "step": 136910 - }, - { - "epoch": 1.2104174401951944, - "grad_norm": 3.0203399658203125, - "learning_rate": 2.9826375996746764e-05, - "loss": 0.7236, - "step": 136920 - }, - { - "epoch": 1.2105058434555067, - "grad_norm": 3.1244728565216064, - "learning_rate": 2.982490260907489e-05, - "loss": 0.5899, - "step": 136930 - }, - { - "epoch": 1.2105942467158188, - "grad_norm": 2.269578456878662, - "learning_rate": 2.982342922140302e-05, - "loss": 0.5994, - "step": 136940 - }, - { - "epoch": 1.2106826499761312, - "grad_norm": 7.8710808753967285, - "learning_rate": 2.9821955833731152e-05, - "loss": 0.5414, - "step": 136950 - }, - { - "epoch": 1.2107710532364433, - "grad_norm": 2.330002784729004, - "learning_rate": 2.9820482446059277e-05, - "loss": 0.5062, - "step": 136960 - }, - { - "epoch": 1.2108594564967556, - "grad_norm": 1.1440849304199219, - "learning_rate": 2.981900905838741e-05, - "loss": 0.5044, - "step": 136970 - }, - { - "epoch": 1.2109478597570678, - "grad_norm": 2.307657480239868, - "learning_rate": 2.9817535670715534e-05, - "loss": 0.6238, - "step": 136980 - }, - { - "epoch": 1.21103626301738, - "grad_norm": 2.5043787956237793, - "learning_rate": 2.9816062283043666e-05, - "loss": 0.623, - "step": 136990 - }, - { - "epoch": 1.2111246662776924, - "grad_norm": 4.4899091720581055, - "learning_rate": 2.9814588895371798e-05, - "loss": 0.5782, - "step": 137000 - }, - { - "epoch": 1.2112130695380046, - "grad_norm": 1.0341296195983887, - "learning_rate": 2.9813115507699922e-05, - "loss": 0.5798, - "step": 137010 - }, - { - "epoch": 1.2113014727983167, - "grad_norm": 1.7302199602127075, - "learning_rate": 2.9811642120028054e-05, - "loss": 0.6064, - "step": 137020 - }, - { - "epoch": 1.211389876058629, - "grad_norm": 17.51358413696289, - "learning_rate": 2.9810168732356186e-05, - "loss": 0.7097, - "step": 137030 - }, - { - "epoch": 1.2114782793189414, - "grad_norm": 3.5841894149780273, - "learning_rate": 2.980869534468431e-05, - "loss": 0.7135, - "step": 137040 - }, - { - "epoch": 1.2115666825792535, - "grad_norm": 4.1583685874938965, - "learning_rate": 2.9807221957012443e-05, - "loss": 0.6791, - "step": 137050 - }, - { - "epoch": 1.2116550858395658, - "grad_norm": 3.6986026763916016, - "learning_rate": 2.9805748569340574e-05, - "loss": 0.6759, - "step": 137060 - }, - { - "epoch": 1.211743489099878, - "grad_norm": 3.9213991165161133, - "learning_rate": 2.98042751816687e-05, - "loss": 0.6375, - "step": 137070 - }, - { - "epoch": 1.2118318923601903, - "grad_norm": 5.8226423263549805, - "learning_rate": 2.980280179399683e-05, - "loss": 0.6665, - "step": 137080 - }, - { - "epoch": 1.2119202956205024, - "grad_norm": 3.0791375637054443, - "learning_rate": 2.9801328406324956e-05, - "loss": 0.6518, - "step": 137090 - }, - { - "epoch": 1.2120086988808147, - "grad_norm": 4.601994514465332, - "learning_rate": 2.9799855018653088e-05, - "loss": 0.6553, - "step": 137100 - }, - { - "epoch": 1.2120971021411269, - "grad_norm": 1.0395901203155518, - "learning_rate": 2.979838163098122e-05, - "loss": 0.6115, - "step": 137110 - }, - { - "epoch": 1.2121855054014392, - "grad_norm": 1.40775465965271, - "learning_rate": 2.9796908243309345e-05, - "loss": 0.6279, - "step": 137120 - }, - { - "epoch": 1.2122739086617513, - "grad_norm": 4.496260166168213, - "learning_rate": 2.9795434855637476e-05, - "loss": 0.5396, - "step": 137130 - }, - { - "epoch": 1.2123623119220637, - "grad_norm": 2.5563015937805176, - "learning_rate": 2.9793961467965608e-05, - "loss": 0.4649, - "step": 137140 - }, - { - "epoch": 1.212450715182376, - "grad_norm": 23.356992721557617, - "learning_rate": 2.9792488080293733e-05, - "loss": 0.6542, - "step": 137150 - }, - { - "epoch": 1.2125391184426881, - "grad_norm": 4.126574993133545, - "learning_rate": 2.9791014692621865e-05, - "loss": 0.4877, - "step": 137160 - }, - { - "epoch": 1.2126275217030005, - "grad_norm": 2.8210806846618652, - "learning_rate": 2.9789541304949996e-05, - "loss": 0.6324, - "step": 137170 - }, - { - "epoch": 1.2127159249633126, - "grad_norm": 3.3037822246551514, - "learning_rate": 2.978806791727812e-05, - "loss": 0.543, - "step": 137180 - }, - { - "epoch": 1.212804328223625, - "grad_norm": 2.2153584957122803, - "learning_rate": 2.9786594529606253e-05, - "loss": 0.7084, - "step": 137190 - }, - { - "epoch": 1.212892731483937, - "grad_norm": 3.0296430587768555, - "learning_rate": 2.978512114193438e-05, - "loss": 0.5181, - "step": 137200 - }, - { - "epoch": 1.2129811347442494, - "grad_norm": 4.3093438148498535, - "learning_rate": 2.978364775426251e-05, - "loss": 0.5614, - "step": 137210 - }, - { - "epoch": 1.2130695380045615, - "grad_norm": 2.935051679611206, - "learning_rate": 2.978217436659064e-05, - "loss": 0.6652, - "step": 137220 - }, - { - "epoch": 1.2131579412648739, - "grad_norm": 3.412858724594116, - "learning_rate": 2.978070097891877e-05, - "loss": 0.556, - "step": 137230 - }, - { - "epoch": 1.213246344525186, - "grad_norm": 2.090912342071533, - "learning_rate": 2.97792275912469e-05, - "loss": 0.5652, - "step": 137240 - }, - { - "epoch": 1.2133347477854983, - "grad_norm": 1.0503562688827515, - "learning_rate": 2.977775420357503e-05, - "loss": 0.6239, - "step": 137250 - }, - { - "epoch": 1.2134231510458107, - "grad_norm": 2.9367406368255615, - "learning_rate": 2.977628081590316e-05, - "loss": 0.786, - "step": 137260 - }, - { - "epoch": 1.2135115543061228, - "grad_norm": 8.727594375610352, - "learning_rate": 2.9774807428231287e-05, - "loss": 0.4766, - "step": 137270 - }, - { - "epoch": 1.2135999575664351, - "grad_norm": 2.7663381099700928, - "learning_rate": 2.977333404055942e-05, - "loss": 0.608, - "step": 137280 - }, - { - "epoch": 1.2136883608267472, - "grad_norm": 1.3357828855514526, - "learning_rate": 2.9771860652887547e-05, - "loss": 0.6031, - "step": 137290 - }, - { - "epoch": 1.2137767640870596, - "grad_norm": 2.4202351570129395, - "learning_rate": 2.9770387265215675e-05, - "loss": 0.6815, - "step": 137300 - }, - { - "epoch": 1.2138651673473717, - "grad_norm": 2.0652809143066406, - "learning_rate": 2.9768913877543804e-05, - "loss": 0.7682, - "step": 137310 - }, - { - "epoch": 1.213953570607684, - "grad_norm": 1.4614520072937012, - "learning_rate": 2.9767440489871935e-05, - "loss": 0.6199, - "step": 137320 - }, - { - "epoch": 1.2140419738679962, - "grad_norm": 2.4469339847564697, - "learning_rate": 2.9765967102200064e-05, - "loss": 0.4863, - "step": 137330 - }, - { - "epoch": 1.2141303771283085, - "grad_norm": 2.1750988960266113, - "learning_rate": 2.9764493714528192e-05, - "loss": 0.4876, - "step": 137340 - }, - { - "epoch": 1.2142187803886206, - "grad_norm": 2.6206347942352295, - "learning_rate": 2.9763020326856324e-05, - "loss": 0.5562, - "step": 137350 - }, - { - "epoch": 1.214307183648933, - "grad_norm": 2.2426376342773438, - "learning_rate": 2.9761546939184452e-05, - "loss": 0.6917, - "step": 137360 - }, - { - "epoch": 1.2143955869092453, - "grad_norm": 6.355005264282227, - "learning_rate": 2.976007355151258e-05, - "loss": 0.6412, - "step": 137370 - }, - { - "epoch": 1.2144839901695574, - "grad_norm": 1.5263639688491821, - "learning_rate": 2.9758600163840712e-05, - "loss": 0.5678, - "step": 137380 - }, - { - "epoch": 1.2145723934298698, - "grad_norm": 1.8938119411468506, - "learning_rate": 2.975712677616884e-05, - "loss": 0.6178, - "step": 137390 - }, - { - "epoch": 1.214660796690182, - "grad_norm": 0.7140837907791138, - "learning_rate": 2.975565338849697e-05, - "loss": 0.6161, - "step": 137400 - }, - { - "epoch": 1.2147491999504942, - "grad_norm": 1.3030954599380493, - "learning_rate": 2.97541800008251e-05, - "loss": 0.6784, - "step": 137410 - }, - { - "epoch": 1.2148376032108064, - "grad_norm": 1.3001399040222168, - "learning_rate": 2.975270661315323e-05, - "loss": 0.6489, - "step": 137420 - }, - { - "epoch": 1.2149260064711187, - "grad_norm": 2.305727005004883, - "learning_rate": 2.9751233225481357e-05, - "loss": 0.6255, - "step": 137430 - }, - { - "epoch": 1.2150144097314308, - "grad_norm": 3.130544662475586, - "learning_rate": 2.974975983780949e-05, - "loss": 0.6187, - "step": 137440 - }, - { - "epoch": 1.2151028129917432, - "grad_norm": 2.104393482208252, - "learning_rate": 2.9748286450137614e-05, - "loss": 0.6462, - "step": 137450 - }, - { - "epoch": 1.2151912162520553, - "grad_norm": 17.182109832763672, - "learning_rate": 2.9746813062465746e-05, - "loss": 0.5703, - "step": 137460 - }, - { - "epoch": 1.2152796195123676, - "grad_norm": 1.2784488201141357, - "learning_rate": 2.9745339674793878e-05, - "loss": 0.5694, - "step": 137470 - }, - { - "epoch": 1.21536802277268, - "grad_norm": 4.601258754730225, - "learning_rate": 2.9743866287122003e-05, - "loss": 0.635, - "step": 137480 - }, - { - "epoch": 1.215456426032992, - "grad_norm": 5.744540214538574, - "learning_rate": 2.9742392899450134e-05, - "loss": 0.5518, - "step": 137490 - }, - { - "epoch": 1.2155448292933044, - "grad_norm": 3.579768419265747, - "learning_rate": 2.9740919511778266e-05, - "loss": 0.6247, - "step": 137500 - }, - { - "epoch": 1.2156332325536165, - "grad_norm": 1.313004732131958, - "learning_rate": 2.973944612410639e-05, - "loss": 0.5962, - "step": 137510 - }, - { - "epoch": 1.215721635813929, - "grad_norm": 2.258887529373169, - "learning_rate": 2.9737972736434523e-05, - "loss": 0.5043, - "step": 137520 - }, - { - "epoch": 1.215810039074241, - "grad_norm": 0.7860084772109985, - "learning_rate": 2.9736499348762654e-05, - "loss": 0.7221, - "step": 137530 - }, - { - "epoch": 1.2158984423345534, - "grad_norm": 1.6615575551986694, - "learning_rate": 2.973502596109078e-05, - "loss": 0.5724, - "step": 137540 - }, - { - "epoch": 1.2159868455948655, - "grad_norm": 7.128425598144531, - "learning_rate": 2.973355257341891e-05, - "loss": 0.7745, - "step": 137550 - }, - { - "epoch": 1.2160752488551778, - "grad_norm": 6.046515464782715, - "learning_rate": 2.9732079185747036e-05, - "loss": 0.6238, - "step": 137560 - }, - { - "epoch": 1.21616365211549, - "grad_norm": 5.1349005699157715, - "learning_rate": 2.9730605798075168e-05, - "loss": 0.6999, - "step": 137570 - }, - { - "epoch": 1.2162520553758023, - "grad_norm": 8.523122787475586, - "learning_rate": 2.97291324104033e-05, - "loss": 0.6226, - "step": 137580 - }, - { - "epoch": 1.2163404586361146, - "grad_norm": 2.7262232303619385, - "learning_rate": 2.9727659022731425e-05, - "loss": 0.7337, - "step": 137590 - }, - { - "epoch": 1.2164288618964267, - "grad_norm": 2.348341703414917, - "learning_rate": 2.9726185635059556e-05, - "loss": 0.6576, - "step": 137600 - }, - { - "epoch": 1.2165172651567389, - "grad_norm": 4.144571304321289, - "learning_rate": 2.9724712247387688e-05, - "loss": 0.6071, - "step": 137610 - }, - { - "epoch": 1.2166056684170512, - "grad_norm": 5.32082462310791, - "learning_rate": 2.9723238859715813e-05, - "loss": 0.6986, - "step": 137620 - }, - { - "epoch": 1.2166940716773635, - "grad_norm": 1.6616417169570923, - "learning_rate": 2.9721765472043945e-05, - "loss": 0.5176, - "step": 137630 - }, - { - "epoch": 1.2167824749376757, - "grad_norm": 8.661393165588379, - "learning_rate": 2.9720292084372077e-05, - "loss": 0.6621, - "step": 137640 - }, - { - "epoch": 1.216870878197988, - "grad_norm": 12.566831588745117, - "learning_rate": 2.97188186967002e-05, - "loss": 0.5925, - "step": 137650 - }, - { - "epoch": 1.2169592814583001, - "grad_norm": 11.358185768127441, - "learning_rate": 2.9717345309028333e-05, - "loss": 0.8209, - "step": 137660 - }, - { - "epoch": 1.2170476847186125, - "grad_norm": 2.285712957382202, - "learning_rate": 2.9715871921356458e-05, - "loss": 0.5525, - "step": 137670 - }, - { - "epoch": 1.2171360879789246, - "grad_norm": 1.9643148183822632, - "learning_rate": 2.971439853368459e-05, - "loss": 0.6341, - "step": 137680 - }, - { - "epoch": 1.217224491239237, - "grad_norm": 6.272572040557861, - "learning_rate": 2.971292514601272e-05, - "loss": 0.6458, - "step": 137690 - }, - { - "epoch": 1.217312894499549, - "grad_norm": 2.1927270889282227, - "learning_rate": 2.9711451758340847e-05, - "loss": 0.6917, - "step": 137700 - }, - { - "epoch": 1.2174012977598614, - "grad_norm": 7.6609697341918945, - "learning_rate": 2.970997837066898e-05, - "loss": 0.6144, - "step": 137710 - }, - { - "epoch": 1.2174897010201735, - "grad_norm": 1.4886795282363892, - "learning_rate": 2.970850498299711e-05, - "loss": 0.6124, - "step": 137720 - }, - { - "epoch": 1.2175781042804859, - "grad_norm": 3.1784870624542236, - "learning_rate": 2.9707031595325235e-05, - "loss": 0.6208, - "step": 137730 - }, - { - "epoch": 1.2176665075407982, - "grad_norm": 4.474926948547363, - "learning_rate": 2.9705558207653367e-05, - "loss": 0.6468, - "step": 137740 - }, - { - "epoch": 1.2177549108011103, - "grad_norm": 2.798476457595825, - "learning_rate": 2.97040848199815e-05, - "loss": 0.5288, - "step": 137750 - }, - { - "epoch": 1.2178433140614227, - "grad_norm": 1.4441986083984375, - "learning_rate": 2.9702611432309624e-05, - "loss": 0.4914, - "step": 137760 - }, - { - "epoch": 1.2179317173217348, - "grad_norm": 3.248546838760376, - "learning_rate": 2.9701138044637755e-05, - "loss": 0.4981, - "step": 137770 - }, - { - "epoch": 1.2180201205820471, - "grad_norm": 3.7279415130615234, - "learning_rate": 2.969966465696588e-05, - "loss": 0.6593, - "step": 137780 - }, - { - "epoch": 1.2181085238423592, - "grad_norm": 2.674619197845459, - "learning_rate": 2.9698191269294012e-05, - "loss": 0.5844, - "step": 137790 - }, - { - "epoch": 1.2181969271026716, - "grad_norm": 8.349813461303711, - "learning_rate": 2.9696717881622144e-05, - "loss": 0.5755, - "step": 137800 - }, - { - "epoch": 1.2182853303629837, - "grad_norm": 4.912051677703857, - "learning_rate": 2.969524449395027e-05, - "loss": 0.5173, - "step": 137810 - }, - { - "epoch": 1.218373733623296, - "grad_norm": 8.534285545349121, - "learning_rate": 2.96937711062784e-05, - "loss": 0.5858, - "step": 137820 - }, - { - "epoch": 1.2184621368836082, - "grad_norm": 1.9246318340301514, - "learning_rate": 2.9692297718606532e-05, - "loss": 0.5856, - "step": 137830 - }, - { - "epoch": 1.2185505401439205, - "grad_norm": 1.6515977382659912, - "learning_rate": 2.9690824330934657e-05, - "loss": 0.6729, - "step": 137840 - }, - { - "epoch": 1.2186389434042328, - "grad_norm": 2.6866743564605713, - "learning_rate": 2.968935094326279e-05, - "loss": 0.6847, - "step": 137850 - }, - { - "epoch": 1.218727346664545, - "grad_norm": 2.5193541049957275, - "learning_rate": 2.968787755559092e-05, - "loss": 0.706, - "step": 137860 - }, - { - "epoch": 1.2188157499248573, - "grad_norm": 2.6215736865997314, - "learning_rate": 2.9686404167919046e-05, - "loss": 0.6244, - "step": 137870 - }, - { - "epoch": 1.2189041531851694, - "grad_norm": 4.919952869415283, - "learning_rate": 2.9684930780247177e-05, - "loss": 0.6091, - "step": 137880 - }, - { - "epoch": 1.2189925564454818, - "grad_norm": 1.4555758237838745, - "learning_rate": 2.968345739257531e-05, - "loss": 0.6375, - "step": 137890 - }, - { - "epoch": 1.2190809597057939, - "grad_norm": 8.432286262512207, - "learning_rate": 2.9681984004903434e-05, - "loss": 0.7329, - "step": 137900 - }, - { - "epoch": 1.2191693629661062, - "grad_norm": 3.4205806255340576, - "learning_rate": 2.9680510617231566e-05, - "loss": 0.5508, - "step": 137910 - }, - { - "epoch": 1.2192577662264183, - "grad_norm": 9.265718460083008, - "learning_rate": 2.967903722955969e-05, - "loss": 0.63, - "step": 137920 - }, - { - "epoch": 1.2193461694867307, - "grad_norm": 1.8168007135391235, - "learning_rate": 2.9677563841887823e-05, - "loss": 0.6797, - "step": 137930 - }, - { - "epoch": 1.2194345727470428, - "grad_norm": 4.4171671867370605, - "learning_rate": 2.9676090454215954e-05, - "loss": 0.5785, - "step": 137940 - }, - { - "epoch": 1.2195229760073552, - "grad_norm": 2.2304272651672363, - "learning_rate": 2.967461706654408e-05, - "loss": 0.6669, - "step": 137950 - }, - { - "epoch": 1.2196113792676675, - "grad_norm": 1.7471081018447876, - "learning_rate": 2.967314367887221e-05, - "loss": 0.6249, - "step": 137960 - }, - { - "epoch": 1.2196997825279796, - "grad_norm": 2.7947728633880615, - "learning_rate": 2.9671670291200343e-05, - "loss": 0.6053, - "step": 137970 - }, - { - "epoch": 1.219788185788292, - "grad_norm": 2.0127172470092773, - "learning_rate": 2.9670196903528468e-05, - "loss": 0.6746, - "step": 137980 - }, - { - "epoch": 1.219876589048604, - "grad_norm": 1.0718377828598022, - "learning_rate": 2.96687235158566e-05, - "loss": 0.5889, - "step": 137990 - }, - { - "epoch": 1.2199649923089164, - "grad_norm": 1.4731075763702393, - "learning_rate": 2.966725012818473e-05, - "loss": 0.6613, - "step": 138000 - }, - { - "epoch": 1.2200533955692285, - "grad_norm": 1.4960943460464478, - "learning_rate": 2.9665776740512856e-05, - "loss": 0.497, - "step": 138010 - }, - { - "epoch": 1.2201417988295409, - "grad_norm": 3.8215792179107666, - "learning_rate": 2.9664303352840988e-05, - "loss": 0.6802, - "step": 138020 - }, - { - "epoch": 1.220230202089853, - "grad_norm": 4.7353010177612305, - "learning_rate": 2.9662829965169113e-05, - "loss": 0.592, - "step": 138030 - }, - { - "epoch": 1.2203186053501653, - "grad_norm": 3.953007698059082, - "learning_rate": 2.9661356577497245e-05, - "loss": 0.5935, - "step": 138040 - }, - { - "epoch": 1.2204070086104775, - "grad_norm": 2.577096700668335, - "learning_rate": 2.9659883189825376e-05, - "loss": 0.5629, - "step": 138050 - }, - { - "epoch": 1.2204954118707898, - "grad_norm": 2.1419148445129395, - "learning_rate": 2.96584098021535e-05, - "loss": 0.75, - "step": 138060 - }, - { - "epoch": 1.2205838151311021, - "grad_norm": 3.8945703506469727, - "learning_rate": 2.9656936414481633e-05, - "loss": 0.4608, - "step": 138070 - }, - { - "epoch": 1.2206722183914143, - "grad_norm": 5.334362030029297, - "learning_rate": 2.9655463026809765e-05, - "loss": 0.5213, - "step": 138080 - }, - { - "epoch": 1.2207606216517266, - "grad_norm": 1.595914363861084, - "learning_rate": 2.965398963913789e-05, - "loss": 0.6218, - "step": 138090 - }, - { - "epoch": 1.2208490249120387, - "grad_norm": 2.302316904067993, - "learning_rate": 2.965251625146602e-05, - "loss": 0.6194, - "step": 138100 - }, - { - "epoch": 1.220937428172351, - "grad_norm": 4.442800521850586, - "learning_rate": 2.9651042863794153e-05, - "loss": 0.5927, - "step": 138110 - }, - { - "epoch": 1.2210258314326632, - "grad_norm": 14.791041374206543, - "learning_rate": 2.9649569476122278e-05, - "loss": 0.6413, - "step": 138120 - }, - { - "epoch": 1.2211142346929755, - "grad_norm": 1.07089102268219, - "learning_rate": 2.964809608845041e-05, - "loss": 0.6038, - "step": 138130 - }, - { - "epoch": 1.2212026379532877, - "grad_norm": 2.700444221496582, - "learning_rate": 2.9646622700778538e-05, - "loss": 0.6453, - "step": 138140 - }, - { - "epoch": 1.2212910412136, - "grad_norm": 2.884981870651245, - "learning_rate": 2.9645149313106667e-05, - "loss": 0.7445, - "step": 138150 - }, - { - "epoch": 1.2213794444739121, - "grad_norm": 10.680171012878418, - "learning_rate": 2.96436759254348e-05, - "loss": 0.5982, - "step": 138160 - }, - { - "epoch": 1.2214678477342245, - "grad_norm": 4.481019973754883, - "learning_rate": 2.9642202537762927e-05, - "loss": 0.6119, - "step": 138170 - }, - { - "epoch": 1.2215562509945368, - "grad_norm": 4.571042537689209, - "learning_rate": 2.9640729150091055e-05, - "loss": 0.5926, - "step": 138180 - }, - { - "epoch": 1.221644654254849, - "grad_norm": 1.2971141338348389, - "learning_rate": 2.9639255762419187e-05, - "loss": 0.6591, - "step": 138190 - }, - { - "epoch": 1.221733057515161, - "grad_norm": 9.165392875671387, - "learning_rate": 2.9637782374747315e-05, - "loss": 0.5971, - "step": 138200 - }, - { - "epoch": 1.2218214607754734, - "grad_norm": 1.4384552240371704, - "learning_rate": 2.9636308987075444e-05, - "loss": 0.5661, - "step": 138210 - }, - { - "epoch": 1.2219098640357857, - "grad_norm": 4.815966606140137, - "learning_rate": 2.9634835599403575e-05, - "loss": 0.6542, - "step": 138220 - }, - { - "epoch": 1.2219982672960978, - "grad_norm": 2.283719539642334, - "learning_rate": 2.9633362211731704e-05, - "loss": 0.5627, - "step": 138230 - }, - { - "epoch": 1.2220866705564102, - "grad_norm": 3.065930128097534, - "learning_rate": 2.9631888824059832e-05, - "loss": 0.7348, - "step": 138240 - }, - { - "epoch": 1.2221750738167223, - "grad_norm": 0.9909847378730774, - "learning_rate": 2.963041543638796e-05, - "loss": 0.6386, - "step": 138250 - }, - { - "epoch": 1.2222634770770346, - "grad_norm": 3.1771318912506104, - "learning_rate": 2.9628942048716092e-05, - "loss": 0.6293, - "step": 138260 - }, - { - "epoch": 1.2223518803373468, - "grad_norm": 1.604655146598816, - "learning_rate": 2.962746866104422e-05, - "loss": 0.5759, - "step": 138270 - }, - { - "epoch": 1.222440283597659, - "grad_norm": 13.580036163330078, - "learning_rate": 2.962599527337235e-05, - "loss": 0.4535, - "step": 138280 - }, - { - "epoch": 1.2225286868579712, - "grad_norm": 3.7436904907226562, - "learning_rate": 2.962452188570048e-05, - "loss": 0.5533, - "step": 138290 - }, - { - "epoch": 1.2226170901182836, - "grad_norm": 5.706722259521484, - "learning_rate": 2.962304849802861e-05, - "loss": 0.5865, - "step": 138300 - }, - { - "epoch": 1.2227054933785957, - "grad_norm": 1.2666332721710205, - "learning_rate": 2.9621575110356737e-05, - "loss": 0.6452, - "step": 138310 - }, - { - "epoch": 1.222793896638908, - "grad_norm": 4.669419288635254, - "learning_rate": 2.962010172268487e-05, - "loss": 0.6412, - "step": 138320 - }, - { - "epoch": 1.2228822998992204, - "grad_norm": 3.056744337081909, - "learning_rate": 2.9618628335012997e-05, - "loss": 0.5803, - "step": 138330 - }, - { - "epoch": 1.2229707031595325, - "grad_norm": 10.539825439453125, - "learning_rate": 2.9617154947341126e-05, - "loss": 0.6298, - "step": 138340 - }, - { - "epoch": 1.2230591064198448, - "grad_norm": 22.080547332763672, - "learning_rate": 2.9615681559669257e-05, - "loss": 0.5054, - "step": 138350 - }, - { - "epoch": 1.223147509680157, - "grad_norm": 1.4531960487365723, - "learning_rate": 2.9614208171997386e-05, - "loss": 0.5574, - "step": 138360 - }, - { - "epoch": 1.2232359129404693, - "grad_norm": 2.239962339401245, - "learning_rate": 2.9612734784325514e-05, - "loss": 0.5634, - "step": 138370 - }, - { - "epoch": 1.2233243162007814, - "grad_norm": 2.4439761638641357, - "learning_rate": 2.9611261396653646e-05, - "loss": 0.5967, - "step": 138380 - }, - { - "epoch": 1.2234127194610938, - "grad_norm": 10.456849098205566, - "learning_rate": 2.960978800898177e-05, - "loss": 0.8345, - "step": 138390 - }, - { - "epoch": 1.2235011227214059, - "grad_norm": 1.9543341398239136, - "learning_rate": 2.9608314621309903e-05, - "loss": 0.5144, - "step": 138400 - }, - { - "epoch": 1.2235895259817182, - "grad_norm": 1.7761907577514648, - "learning_rate": 2.9606841233638034e-05, - "loss": 0.5498, - "step": 138410 - }, - { - "epoch": 1.2236779292420303, - "grad_norm": 4.272205829620361, - "learning_rate": 2.960536784596616e-05, - "loss": 0.6445, - "step": 138420 - }, - { - "epoch": 1.2237663325023427, - "grad_norm": 4.287578582763672, - "learning_rate": 2.960389445829429e-05, - "loss": 0.7997, - "step": 138430 - }, - { - "epoch": 1.223854735762655, - "grad_norm": 2.6554386615753174, - "learning_rate": 2.9602421070622423e-05, - "loss": 0.5588, - "step": 138440 - }, - { - "epoch": 1.2239431390229671, - "grad_norm": 2.591968536376953, - "learning_rate": 2.9600947682950548e-05, - "loss": 0.6851, - "step": 138450 - }, - { - "epoch": 1.2240315422832795, - "grad_norm": 13.519428253173828, - "learning_rate": 2.959947429527868e-05, - "loss": 0.6773, - "step": 138460 - }, - { - "epoch": 1.2241199455435916, - "grad_norm": 5.097795486450195, - "learning_rate": 2.959800090760681e-05, - "loss": 0.573, - "step": 138470 - }, - { - "epoch": 1.224208348803904, - "grad_norm": 6.76710844039917, - "learning_rate": 2.9596527519934936e-05, - "loss": 0.6682, - "step": 138480 - }, - { - "epoch": 1.224296752064216, - "grad_norm": 1.6406009197235107, - "learning_rate": 2.9595054132263068e-05, - "loss": 0.5799, - "step": 138490 - }, - { - "epoch": 1.2243851553245284, - "grad_norm": 1.3960139751434326, - "learning_rate": 2.9593580744591193e-05, - "loss": 0.5657, - "step": 138500 - }, - { - "epoch": 1.2244735585848405, - "grad_norm": 2.7008554935455322, - "learning_rate": 2.9592107356919325e-05, - "loss": 0.7137, - "step": 138510 - }, - { - "epoch": 1.2245619618451529, - "grad_norm": 6.212095737457275, - "learning_rate": 2.9590633969247456e-05, - "loss": 0.7117, - "step": 138520 - }, - { - "epoch": 1.224650365105465, - "grad_norm": 1.4511773586273193, - "learning_rate": 2.958916058157558e-05, - "loss": 0.584, - "step": 138530 - }, - { - "epoch": 1.2247387683657773, - "grad_norm": 8.34814739227295, - "learning_rate": 2.9587687193903713e-05, - "loss": 0.6637, - "step": 138540 - }, - { - "epoch": 1.2248271716260897, - "grad_norm": 0.6713892817497253, - "learning_rate": 2.9586213806231845e-05, - "loss": 0.5589, - "step": 138550 - }, - { - "epoch": 1.2249155748864018, - "grad_norm": 14.103983879089355, - "learning_rate": 2.958474041855997e-05, - "loss": 0.5878, - "step": 138560 - }, - { - "epoch": 1.2250039781467141, - "grad_norm": 3.680828809738159, - "learning_rate": 2.95832670308881e-05, - "loss": 0.6141, - "step": 138570 - }, - { - "epoch": 1.2250923814070263, - "grad_norm": 10.030557632446289, - "learning_rate": 2.9581793643216233e-05, - "loss": 0.6395, - "step": 138580 - }, - { - "epoch": 1.2251807846673386, - "grad_norm": 2.402918815612793, - "learning_rate": 2.9580320255544358e-05, - "loss": 0.6361, - "step": 138590 - }, - { - "epoch": 1.2252691879276507, - "grad_norm": 3.20762038230896, - "learning_rate": 2.957884686787249e-05, - "loss": 0.6201, - "step": 138600 - }, - { - "epoch": 1.225357591187963, - "grad_norm": 5.7481584548950195, - "learning_rate": 2.9577373480200615e-05, - "loss": 0.559, - "step": 138610 - }, - { - "epoch": 1.2254459944482752, - "grad_norm": 2.062344789505005, - "learning_rate": 2.9575900092528747e-05, - "loss": 0.6437, - "step": 138620 - }, - { - "epoch": 1.2255343977085875, - "grad_norm": 6.626830577850342, - "learning_rate": 2.957442670485688e-05, - "loss": 0.6104, - "step": 138630 - }, - { - "epoch": 1.2256228009688996, - "grad_norm": 2.9041008949279785, - "learning_rate": 2.9572953317185003e-05, - "loss": 0.6814, - "step": 138640 - }, - { - "epoch": 1.225711204229212, - "grad_norm": 2.7060437202453613, - "learning_rate": 2.9571479929513135e-05, - "loss": 0.7021, - "step": 138650 - }, - { - "epoch": 1.2257996074895243, - "grad_norm": 1.874562382698059, - "learning_rate": 2.9570006541841267e-05, - "loss": 0.4089, - "step": 138660 - }, - { - "epoch": 1.2258880107498364, - "grad_norm": 4.5058698654174805, - "learning_rate": 2.9568533154169392e-05, - "loss": 0.7192, - "step": 138670 - }, - { - "epoch": 1.2259764140101488, - "grad_norm": 2.774868965148926, - "learning_rate": 2.9567059766497524e-05, - "loss": 0.6546, - "step": 138680 - }, - { - "epoch": 1.226064817270461, - "grad_norm": 2.191645622253418, - "learning_rate": 2.9565586378825655e-05, - "loss": 0.6557, - "step": 138690 - }, - { - "epoch": 1.2261532205307732, - "grad_norm": 9.972789764404297, - "learning_rate": 2.956411299115378e-05, - "loss": 0.5647, - "step": 138700 - }, - { - "epoch": 1.2262416237910854, - "grad_norm": 5.8902692794799805, - "learning_rate": 2.9562639603481912e-05, - "loss": 0.7517, - "step": 138710 - }, - { - "epoch": 1.2263300270513977, - "grad_norm": 2.5864126682281494, - "learning_rate": 2.9561166215810037e-05, - "loss": 0.6825, - "step": 138720 - }, - { - "epoch": 1.2264184303117098, - "grad_norm": 7.310678005218506, - "learning_rate": 2.955969282813817e-05, - "loss": 0.5666, - "step": 138730 - }, - { - "epoch": 1.2265068335720222, - "grad_norm": 3.093445301055908, - "learning_rate": 2.95582194404663e-05, - "loss": 0.6412, - "step": 138740 - }, - { - "epoch": 1.2265952368323343, - "grad_norm": 1.314583420753479, - "learning_rate": 2.9556746052794425e-05, - "loss": 0.7598, - "step": 138750 - }, - { - "epoch": 1.2266836400926466, - "grad_norm": 2.556443691253662, - "learning_rate": 2.9555272665122557e-05, - "loss": 0.6774, - "step": 138760 - }, - { - "epoch": 1.226772043352959, - "grad_norm": 2.419553518295288, - "learning_rate": 2.955379927745069e-05, - "loss": 0.6102, - "step": 138770 - }, - { - "epoch": 1.226860446613271, - "grad_norm": 3.532956838607788, - "learning_rate": 2.9552325889778814e-05, - "loss": 0.5526, - "step": 138780 - }, - { - "epoch": 1.2269488498735832, - "grad_norm": 1.0614246129989624, - "learning_rate": 2.9550852502106946e-05, - "loss": 0.5958, - "step": 138790 - }, - { - "epoch": 1.2270372531338956, - "grad_norm": 1.4571219682693481, - "learning_rate": 2.9549379114435077e-05, - "loss": 0.6172, - "step": 138800 - }, - { - "epoch": 1.227125656394208, - "grad_norm": 3.0234410762786865, - "learning_rate": 2.9547905726763202e-05, - "loss": 0.7691, - "step": 138810 - }, - { - "epoch": 1.22721405965452, - "grad_norm": 1.7629417181015015, - "learning_rate": 2.9546432339091334e-05, - "loss": 0.5493, - "step": 138820 - }, - { - "epoch": 1.2273024629148324, - "grad_norm": 4.805184364318848, - "learning_rate": 2.9544958951419466e-05, - "loss": 0.5679, - "step": 138830 - }, - { - "epoch": 1.2273908661751445, - "grad_norm": 5.6291728019714355, - "learning_rate": 2.954348556374759e-05, - "loss": 0.6767, - "step": 138840 - }, - { - "epoch": 1.2274792694354568, - "grad_norm": 3.7806589603424072, - "learning_rate": 2.9542012176075723e-05, - "loss": 0.6058, - "step": 138850 - }, - { - "epoch": 1.227567672695769, - "grad_norm": 2.1739916801452637, - "learning_rate": 2.9540538788403848e-05, - "loss": 0.6627, - "step": 138860 - }, - { - "epoch": 1.2276560759560813, - "grad_norm": 1.126368522644043, - "learning_rate": 2.953906540073198e-05, - "loss": 0.5701, - "step": 138870 - }, - { - "epoch": 1.2277444792163936, - "grad_norm": 3.0415472984313965, - "learning_rate": 2.953759201306011e-05, - "loss": 0.6517, - "step": 138880 - }, - { - "epoch": 1.2278328824767057, - "grad_norm": 5.938354969024658, - "learning_rate": 2.9536118625388236e-05, - "loss": 0.6688, - "step": 138890 - }, - { - "epoch": 1.2279212857370179, - "grad_norm": 6.8731207847595215, - "learning_rate": 2.9534645237716368e-05, - "loss": 0.793, - "step": 138900 - }, - { - "epoch": 1.2280096889973302, - "grad_norm": 3.185910701751709, - "learning_rate": 2.95331718500445e-05, - "loss": 0.5834, - "step": 138910 - }, - { - "epoch": 1.2280980922576425, - "grad_norm": 10.668088912963867, - "learning_rate": 2.9531698462372624e-05, - "loss": 0.5753, - "step": 138920 - }, - { - "epoch": 1.2281864955179547, - "grad_norm": 1.5388685464859009, - "learning_rate": 2.9530225074700756e-05, - "loss": 0.517, - "step": 138930 - }, - { - "epoch": 1.228274898778267, - "grad_norm": 1.0322678089141846, - "learning_rate": 2.9528751687028888e-05, - "loss": 0.5561, - "step": 138940 - }, - { - "epoch": 1.2283633020385791, - "grad_norm": 1.669554591178894, - "learning_rate": 2.9527278299357013e-05, - "loss": 0.6144, - "step": 138950 - }, - { - "epoch": 1.2284517052988915, - "grad_norm": 2.211992025375366, - "learning_rate": 2.9525804911685145e-05, - "loss": 0.648, - "step": 138960 - }, - { - "epoch": 1.2285401085592036, - "grad_norm": 5.565649509429932, - "learning_rate": 2.952433152401327e-05, - "loss": 0.7602, - "step": 138970 - }, - { - "epoch": 1.228628511819516, - "grad_norm": 3.330230712890625, - "learning_rate": 2.95228581363414e-05, - "loss": 0.7364, - "step": 138980 - }, - { - "epoch": 1.228716915079828, - "grad_norm": 1.3671505451202393, - "learning_rate": 2.9521384748669533e-05, - "loss": 0.6803, - "step": 138990 - }, - { - "epoch": 1.2288053183401404, - "grad_norm": 3.701233148574829, - "learning_rate": 2.9519911360997658e-05, - "loss": 0.6709, - "step": 139000 - }, - { - "epoch": 1.2288937216004525, - "grad_norm": 4.393486499786377, - "learning_rate": 2.951843797332579e-05, - "loss": 0.6979, - "step": 139010 - }, - { - "epoch": 1.2289821248607649, - "grad_norm": 3.8513600826263428, - "learning_rate": 2.951696458565392e-05, - "loss": 0.4942, - "step": 139020 - }, - { - "epoch": 1.2290705281210772, - "grad_norm": 2.520718574523926, - "learning_rate": 2.9515491197982046e-05, - "loss": 0.613, - "step": 139030 - }, - { - "epoch": 1.2291589313813893, - "grad_norm": 5.0588202476501465, - "learning_rate": 2.9514017810310178e-05, - "loss": 0.6457, - "step": 139040 - }, - { - "epoch": 1.2292473346417017, - "grad_norm": 3.025023937225342, - "learning_rate": 2.951254442263831e-05, - "loss": 0.6269, - "step": 139050 - }, - { - "epoch": 1.2293357379020138, - "grad_norm": 2.1494200229644775, - "learning_rate": 2.9511071034966435e-05, - "loss": 0.7161, - "step": 139060 - }, - { - "epoch": 1.2294241411623261, - "grad_norm": 1.9760066270828247, - "learning_rate": 2.9509597647294567e-05, - "loss": 0.6012, - "step": 139070 - }, - { - "epoch": 1.2295125444226382, - "grad_norm": 11.698833465576172, - "learning_rate": 2.9508124259622695e-05, - "loss": 0.617, - "step": 139080 - }, - { - "epoch": 1.2296009476829506, - "grad_norm": 1.958322286605835, - "learning_rate": 2.9506650871950823e-05, - "loss": 0.5638, - "step": 139090 - }, - { - "epoch": 1.2296893509432627, - "grad_norm": 1.2511893510818481, - "learning_rate": 2.9505177484278955e-05, - "loss": 0.5512, - "step": 139100 - }, - { - "epoch": 1.229777754203575, - "grad_norm": 2.959009885787964, - "learning_rate": 2.9503704096607083e-05, - "loss": 0.5491, - "step": 139110 - }, - { - "epoch": 1.2298661574638872, - "grad_norm": 1.961250901222229, - "learning_rate": 2.9502230708935212e-05, - "loss": 0.5892, - "step": 139120 - }, - { - "epoch": 1.2299545607241995, - "grad_norm": 1.7659155130386353, - "learning_rate": 2.9500757321263344e-05, - "loss": 0.6596, - "step": 139130 - }, - { - "epoch": 1.2300429639845119, - "grad_norm": 1.3340916633605957, - "learning_rate": 2.9499283933591472e-05, - "loss": 0.6452, - "step": 139140 - }, - { - "epoch": 1.230131367244824, - "grad_norm": 5.906961917877197, - "learning_rate": 2.94978105459196e-05, - "loss": 0.6852, - "step": 139150 - }, - { - "epoch": 1.2302197705051363, - "grad_norm": 5.5011701583862305, - "learning_rate": 2.9496337158247732e-05, - "loss": 0.5948, - "step": 139160 - }, - { - "epoch": 1.2303081737654484, - "grad_norm": 2.077199697494507, - "learning_rate": 2.949486377057586e-05, - "loss": 0.6935, - "step": 139170 - }, - { - "epoch": 1.2303965770257608, - "grad_norm": 10.900552749633789, - "learning_rate": 2.949339038290399e-05, - "loss": 0.6032, - "step": 139180 - }, - { - "epoch": 1.230484980286073, - "grad_norm": 1.7784994840621948, - "learning_rate": 2.9491916995232117e-05, - "loss": 0.6936, - "step": 139190 - }, - { - "epoch": 1.2305733835463852, - "grad_norm": 1.6221376657485962, - "learning_rate": 2.949044360756025e-05, - "loss": 0.6826, - "step": 139200 - }, - { - "epoch": 1.2306617868066974, - "grad_norm": 3.658998727798462, - "learning_rate": 2.9488970219888377e-05, - "loss": 0.7465, - "step": 139210 - }, - { - "epoch": 1.2307501900670097, - "grad_norm": 5.2513580322265625, - "learning_rate": 2.9487496832216506e-05, - "loss": 0.6939, - "step": 139220 - }, - { - "epoch": 1.2308385933273218, - "grad_norm": 2.5928003787994385, - "learning_rate": 2.9486023444544637e-05, - "loss": 0.6589, - "step": 139230 - }, - { - "epoch": 1.2309269965876342, - "grad_norm": 1.4034242630004883, - "learning_rate": 2.9484550056872766e-05, - "loss": 0.5372, - "step": 139240 - }, - { - "epoch": 1.2310153998479465, - "grad_norm": 5.431221008300781, - "learning_rate": 2.9483076669200894e-05, - "loss": 0.5149, - "step": 139250 - }, - { - "epoch": 1.2311038031082586, - "grad_norm": 2.4931249618530273, - "learning_rate": 2.9481603281529026e-05, - "loss": 0.525, - "step": 139260 - }, - { - "epoch": 1.231192206368571, - "grad_norm": 6.9441046714782715, - "learning_rate": 2.9480129893857154e-05, - "loss": 0.6273, - "step": 139270 - }, - { - "epoch": 1.231280609628883, - "grad_norm": 2.858452558517456, - "learning_rate": 2.9478656506185282e-05, - "loss": 0.5048, - "step": 139280 - }, - { - "epoch": 1.2313690128891954, - "grad_norm": 4.454078197479248, - "learning_rate": 2.9477183118513414e-05, - "loss": 0.4997, - "step": 139290 - }, - { - "epoch": 1.2314574161495075, - "grad_norm": 3.5746941566467285, - "learning_rate": 2.9475709730841543e-05, - "loss": 0.5961, - "step": 139300 - }, - { - "epoch": 1.2315458194098199, - "grad_norm": 2.514056444168091, - "learning_rate": 2.947423634316967e-05, - "loss": 0.5532, - "step": 139310 - }, - { - "epoch": 1.231634222670132, - "grad_norm": 1.0345075130462646, - "learning_rate": 2.9472762955497803e-05, - "loss": 0.4782, - "step": 139320 - }, - { - "epoch": 1.2317226259304443, - "grad_norm": 3.135854959487915, - "learning_rate": 2.9471289567825928e-05, - "loss": 0.5563, - "step": 139330 - }, - { - "epoch": 1.2318110291907565, - "grad_norm": 1.2373956441879272, - "learning_rate": 2.946981618015406e-05, - "loss": 0.5486, - "step": 139340 - }, - { - "epoch": 1.2318994324510688, - "grad_norm": 0.9658572673797607, - "learning_rate": 2.946834279248219e-05, - "loss": 0.6118, - "step": 139350 - }, - { - "epoch": 1.2319878357113812, - "grad_norm": 1.2828421592712402, - "learning_rate": 2.9466869404810316e-05, - "loss": 0.6404, - "step": 139360 - }, - { - "epoch": 1.2320762389716933, - "grad_norm": 4.5428266525268555, - "learning_rate": 2.9465396017138448e-05, - "loss": 0.5508, - "step": 139370 - }, - { - "epoch": 1.2321646422320054, - "grad_norm": 15.900464057922363, - "learning_rate": 2.946392262946658e-05, - "loss": 0.526, - "step": 139380 - }, - { - "epoch": 1.2322530454923177, - "grad_norm": 1.7019041776657104, - "learning_rate": 2.9462449241794704e-05, - "loss": 0.5456, - "step": 139390 - }, - { - "epoch": 1.23234144875263, - "grad_norm": 7.868950843811035, - "learning_rate": 2.9460975854122836e-05, - "loss": 0.5342, - "step": 139400 - }, - { - "epoch": 1.2324298520129422, - "grad_norm": 1.0215487480163574, - "learning_rate": 2.9459502466450968e-05, - "loss": 0.43, - "step": 139410 - }, - { - "epoch": 1.2325182552732545, - "grad_norm": 1.0092544555664062, - "learning_rate": 2.9458029078779093e-05, - "loss": 0.6623, - "step": 139420 - }, - { - "epoch": 1.2326066585335667, - "grad_norm": 4.069736957550049, - "learning_rate": 2.9456555691107225e-05, - "loss": 0.6316, - "step": 139430 - }, - { - "epoch": 1.232695061793879, - "grad_norm": 1.8851509094238281, - "learning_rate": 2.945508230343535e-05, - "loss": 0.4998, - "step": 139440 - }, - { - "epoch": 1.2327834650541911, - "grad_norm": 1.1257460117340088, - "learning_rate": 2.945360891576348e-05, - "loss": 0.5904, - "step": 139450 - }, - { - "epoch": 1.2328718683145035, - "grad_norm": 6.347303867340088, - "learning_rate": 2.9452135528091613e-05, - "loss": 0.7657, - "step": 139460 - }, - { - "epoch": 1.2329602715748158, - "grad_norm": 4.237667083740234, - "learning_rate": 2.9450662140419738e-05, - "loss": 0.7489, - "step": 139470 - }, - { - "epoch": 1.233048674835128, - "grad_norm": 1.6039080619812012, - "learning_rate": 2.944918875274787e-05, - "loss": 0.5705, - "step": 139480 - }, - { - "epoch": 1.23313707809544, - "grad_norm": 2.965522289276123, - "learning_rate": 2.9447715365076e-05, - "loss": 0.6045, - "step": 139490 - }, - { - "epoch": 1.2332254813557524, - "grad_norm": 1.2478375434875488, - "learning_rate": 2.9446241977404127e-05, - "loss": 0.5403, - "step": 139500 - }, - { - "epoch": 1.2333138846160647, - "grad_norm": 1.5774801969528198, - "learning_rate": 2.9444768589732258e-05, - "loss": 0.6005, - "step": 139510 - }, - { - "epoch": 1.2334022878763768, - "grad_norm": 4.610653877258301, - "learning_rate": 2.944329520206039e-05, - "loss": 0.6143, - "step": 139520 - }, - { - "epoch": 1.2334906911366892, - "grad_norm": 10.987772941589355, - "learning_rate": 2.9441821814388515e-05, - "loss": 0.8183, - "step": 139530 - }, - { - "epoch": 1.2335790943970013, - "grad_norm": 6.430619716644287, - "learning_rate": 2.9440348426716647e-05, - "loss": 0.7105, - "step": 139540 - }, - { - "epoch": 1.2336674976573136, - "grad_norm": 1.183045506477356, - "learning_rate": 2.9438875039044772e-05, - "loss": 0.5627, - "step": 139550 - }, - { - "epoch": 1.2337559009176258, - "grad_norm": 1.4134809970855713, - "learning_rate": 2.9437401651372903e-05, - "loss": 0.6304, - "step": 139560 - }, - { - "epoch": 1.2338443041779381, - "grad_norm": 4.921472549438477, - "learning_rate": 2.9435928263701035e-05, - "loss": 0.6883, - "step": 139570 - }, - { - "epoch": 1.2339327074382502, - "grad_norm": 3.9728519916534424, - "learning_rate": 2.943445487602916e-05, - "loss": 0.5863, - "step": 139580 - }, - { - "epoch": 1.2340211106985626, - "grad_norm": 2.939859628677368, - "learning_rate": 2.9432981488357292e-05, - "loss": 0.6287, - "step": 139590 - }, - { - "epoch": 1.2341095139588747, - "grad_norm": 1.3547147512435913, - "learning_rate": 2.9431508100685424e-05, - "loss": 0.6122, - "step": 139600 - }, - { - "epoch": 1.234197917219187, - "grad_norm": 2.903419256210327, - "learning_rate": 2.943003471301355e-05, - "loss": 0.6081, - "step": 139610 - }, - { - "epoch": 1.2342863204794994, - "grad_norm": 4.38590145111084, - "learning_rate": 2.942856132534168e-05, - "loss": 0.4389, - "step": 139620 - }, - { - "epoch": 1.2343747237398115, - "grad_norm": 7.4531450271606445, - "learning_rate": 2.9427087937669812e-05, - "loss": 0.5544, - "step": 139630 - }, - { - "epoch": 1.2344631270001238, - "grad_norm": 2.2751047611236572, - "learning_rate": 2.9425614549997937e-05, - "loss": 0.6129, - "step": 139640 - }, - { - "epoch": 1.234551530260436, - "grad_norm": 1.5383727550506592, - "learning_rate": 2.942414116232607e-05, - "loss": 0.5807, - "step": 139650 - }, - { - "epoch": 1.2346399335207483, - "grad_norm": 1.790018916130066, - "learning_rate": 2.9422667774654194e-05, - "loss": 0.5889, - "step": 139660 - }, - { - "epoch": 1.2347283367810604, - "grad_norm": 1.6726810932159424, - "learning_rate": 2.9421194386982326e-05, - "loss": 0.5862, - "step": 139670 - }, - { - "epoch": 1.2348167400413728, - "grad_norm": 2.0210378170013428, - "learning_rate": 2.9419720999310457e-05, - "loss": 0.6687, - "step": 139680 - }, - { - "epoch": 1.2349051433016849, - "grad_norm": 4.886565685272217, - "learning_rate": 2.9418247611638582e-05, - "loss": 0.5988, - "step": 139690 - }, - { - "epoch": 1.2349935465619972, - "grad_norm": 5.94719123840332, - "learning_rate": 2.9416774223966714e-05, - "loss": 0.6724, - "step": 139700 - }, - { - "epoch": 1.2350819498223093, - "grad_norm": 5.646327018737793, - "learning_rate": 2.9415300836294846e-05, - "loss": 0.5967, - "step": 139710 - }, - { - "epoch": 1.2351703530826217, - "grad_norm": 0.8480719327926636, - "learning_rate": 2.941382744862297e-05, - "loss": 0.6368, - "step": 139720 - }, - { - "epoch": 1.235258756342934, - "grad_norm": 7.830117702484131, - "learning_rate": 2.9412354060951102e-05, - "loss": 0.6109, - "step": 139730 - }, - { - "epoch": 1.2353471596032461, - "grad_norm": 4.0005784034729, - "learning_rate": 2.9410880673279234e-05, - "loss": 0.6374, - "step": 139740 - }, - { - "epoch": 1.2354355628635585, - "grad_norm": 4.897264003753662, - "learning_rate": 2.940940728560736e-05, - "loss": 0.4978, - "step": 139750 - }, - { - "epoch": 1.2355239661238706, - "grad_norm": 7.273678302764893, - "learning_rate": 2.940793389793549e-05, - "loss": 0.7556, - "step": 139760 - }, - { - "epoch": 1.235612369384183, - "grad_norm": 1.4680782556533813, - "learning_rate": 2.9406460510263623e-05, - "loss": 0.5497, - "step": 139770 - }, - { - "epoch": 1.235700772644495, - "grad_norm": 3.8397841453552246, - "learning_rate": 2.9404987122591748e-05, - "loss": 0.7207, - "step": 139780 - }, - { - "epoch": 1.2357891759048074, - "grad_norm": 4.973461627960205, - "learning_rate": 2.940351373491988e-05, - "loss": 0.6682, - "step": 139790 - }, - { - "epoch": 1.2358775791651195, - "grad_norm": 3.151339292526245, - "learning_rate": 2.9402040347248004e-05, - "loss": 0.6395, - "step": 139800 - }, - { - "epoch": 1.2359659824254319, - "grad_norm": 2.884575128555298, - "learning_rate": 2.9400566959576136e-05, - "loss": 0.6677, - "step": 139810 - }, - { - "epoch": 1.236054385685744, - "grad_norm": 6.017087936401367, - "learning_rate": 2.9399093571904268e-05, - "loss": 0.5366, - "step": 139820 - }, - { - "epoch": 1.2361427889460563, - "grad_norm": 4.820949077606201, - "learning_rate": 2.9397620184232393e-05, - "loss": 0.583, - "step": 139830 - }, - { - "epoch": 1.2362311922063687, - "grad_norm": 8.258111000061035, - "learning_rate": 2.9396146796560524e-05, - "loss": 0.6777, - "step": 139840 - }, - { - "epoch": 1.2363195954666808, - "grad_norm": 1.1991479396820068, - "learning_rate": 2.9394673408888656e-05, - "loss": 0.6433, - "step": 139850 - }, - { - "epoch": 1.2364079987269931, - "grad_norm": 2.9836766719818115, - "learning_rate": 2.939320002121678e-05, - "loss": 0.4588, - "step": 139860 - }, - { - "epoch": 1.2364964019873053, - "grad_norm": 3.943164587020874, - "learning_rate": 2.9391726633544913e-05, - "loss": 0.619, - "step": 139870 - }, - { - "epoch": 1.2365848052476176, - "grad_norm": 2.0488691329956055, - "learning_rate": 2.9390253245873045e-05, - "loss": 0.6212, - "step": 139880 - }, - { - "epoch": 1.2366732085079297, - "grad_norm": 2.4676930904388428, - "learning_rate": 2.938877985820117e-05, - "loss": 0.561, - "step": 139890 - }, - { - "epoch": 1.236761611768242, - "grad_norm": 1.7249491214752197, - "learning_rate": 2.93873064705293e-05, - "loss": 0.5581, - "step": 139900 - }, - { - "epoch": 1.2368500150285542, - "grad_norm": 1.536492109298706, - "learning_rate": 2.9385833082857426e-05, - "loss": 0.66, - "step": 139910 - }, - { - "epoch": 1.2369384182888665, - "grad_norm": 3.7124695777893066, - "learning_rate": 2.9384359695185558e-05, - "loss": 0.6974, - "step": 139920 - }, - { - "epoch": 1.2370268215491786, - "grad_norm": 2.430027484893799, - "learning_rate": 2.938288630751369e-05, - "loss": 0.6646, - "step": 139930 - }, - { - "epoch": 1.237115224809491, - "grad_norm": 5.068747043609619, - "learning_rate": 2.9381412919841815e-05, - "loss": 0.5946, - "step": 139940 - }, - { - "epoch": 1.2372036280698033, - "grad_norm": 5.979935169219971, - "learning_rate": 2.9379939532169947e-05, - "loss": 0.6908, - "step": 139950 - }, - { - "epoch": 1.2372920313301154, - "grad_norm": 6.684631824493408, - "learning_rate": 2.9378466144498078e-05, - "loss": 0.5682, - "step": 139960 - }, - { - "epoch": 1.2373804345904278, - "grad_norm": 3.413923740386963, - "learning_rate": 2.9376992756826203e-05, - "loss": 0.6468, - "step": 139970 - }, - { - "epoch": 1.23746883785074, - "grad_norm": 1.644879698753357, - "learning_rate": 2.9375519369154335e-05, - "loss": 0.6515, - "step": 139980 - }, - { - "epoch": 1.2375572411110523, - "grad_norm": 4.588129997253418, - "learning_rate": 2.9374045981482467e-05, - "loss": 0.563, - "step": 139990 - }, - { - "epoch": 1.2376456443713644, - "grad_norm": 3.4406423568725586, - "learning_rate": 2.937257259381059e-05, - "loss": 0.5147, - "step": 140000 - }, - { - "epoch": 1.2377340476316767, - "grad_norm": 5.868357181549072, - "learning_rate": 2.9371099206138723e-05, - "loss": 0.6511, - "step": 140010 - }, - { - "epoch": 1.2378224508919888, - "grad_norm": 1.119891881942749, - "learning_rate": 2.9369625818466852e-05, - "loss": 0.6221, - "step": 140020 - }, - { - "epoch": 1.2379108541523012, - "grad_norm": 4.095285415649414, - "learning_rate": 2.936815243079498e-05, - "loss": 0.5063, - "step": 140030 - }, - { - "epoch": 1.2379992574126133, - "grad_norm": 2.5871200561523438, - "learning_rate": 2.9366679043123112e-05, - "loss": 0.7259, - "step": 140040 - }, - { - "epoch": 1.2380876606729256, - "grad_norm": 1.1331309080123901, - "learning_rate": 2.936520565545124e-05, - "loss": 0.5898, - "step": 140050 - }, - { - "epoch": 1.238176063933238, - "grad_norm": 4.594869136810303, - "learning_rate": 2.936373226777937e-05, - "loss": 0.6123, - "step": 140060 - }, - { - "epoch": 1.23826446719355, - "grad_norm": 1.2060575485229492, - "learning_rate": 2.93622588801075e-05, - "loss": 0.5587, - "step": 140070 - }, - { - "epoch": 1.2383528704538622, - "grad_norm": 1.2319847345352173, - "learning_rate": 2.936078549243563e-05, - "loss": 0.6498, - "step": 140080 - }, - { - "epoch": 1.2384412737141746, - "grad_norm": 11.477744102478027, - "learning_rate": 2.9359312104763757e-05, - "loss": 0.5192, - "step": 140090 - }, - { - "epoch": 1.238529676974487, - "grad_norm": 7.122554779052734, - "learning_rate": 2.935783871709189e-05, - "loss": 0.6864, - "step": 140100 - }, - { - "epoch": 1.238618080234799, - "grad_norm": 4.3073835372924805, - "learning_rate": 2.9356365329420017e-05, - "loss": 0.5233, - "step": 140110 - }, - { - "epoch": 1.2387064834951114, - "grad_norm": 5.499136447906494, - "learning_rate": 2.9354891941748145e-05, - "loss": 0.5919, - "step": 140120 - }, - { - "epoch": 1.2387948867554235, - "grad_norm": 5.400546073913574, - "learning_rate": 2.9353418554076277e-05, - "loss": 0.56, - "step": 140130 - }, - { - "epoch": 1.2388832900157358, - "grad_norm": 1.5307656526565552, - "learning_rate": 2.9351945166404406e-05, - "loss": 0.6586, - "step": 140140 - }, - { - "epoch": 1.238971693276048, - "grad_norm": 3.108717918395996, - "learning_rate": 2.9350471778732534e-05, - "loss": 0.6399, - "step": 140150 - }, - { - "epoch": 1.2390600965363603, - "grad_norm": 2.895707130432129, - "learning_rate": 2.9348998391060662e-05, - "loss": 0.7124, - "step": 140160 - }, - { - "epoch": 1.2391484997966724, - "grad_norm": 1.0909459590911865, - "learning_rate": 2.9347525003388794e-05, - "loss": 0.6455, - "step": 140170 - }, - { - "epoch": 1.2392369030569848, - "grad_norm": 4.810318946838379, - "learning_rate": 2.9346051615716922e-05, - "loss": 0.6425, - "step": 140180 - }, - { - "epoch": 1.2393253063172969, - "grad_norm": 5.3630266189575195, - "learning_rate": 2.934457822804505e-05, - "loss": 0.6832, - "step": 140190 - }, - { - "epoch": 1.2394137095776092, - "grad_norm": 7.8332343101501465, - "learning_rate": 2.9343104840373182e-05, - "loss": 0.6905, - "step": 140200 - }, - { - "epoch": 1.2395021128379216, - "grad_norm": 2.4580328464508057, - "learning_rate": 2.934163145270131e-05, - "loss": 0.6283, - "step": 140210 - }, - { - "epoch": 1.2395905160982337, - "grad_norm": 3.7288308143615723, - "learning_rate": 2.934015806502944e-05, - "loss": 0.659, - "step": 140220 - }, - { - "epoch": 1.239678919358546, - "grad_norm": 5.179182052612305, - "learning_rate": 2.933868467735757e-05, - "loss": 0.5555, - "step": 140230 - }, - { - "epoch": 1.2397673226188581, - "grad_norm": 2.32548189163208, - "learning_rate": 2.9337211289685703e-05, - "loss": 0.6479, - "step": 140240 - }, - { - "epoch": 1.2398557258791705, - "grad_norm": 1.174964189529419, - "learning_rate": 2.9335737902013828e-05, - "loss": 0.6909, - "step": 140250 - }, - { - "epoch": 1.2399441291394826, - "grad_norm": 1.8876432180404663, - "learning_rate": 2.933426451434196e-05, - "loss": 0.6437, - "step": 140260 - }, - { - "epoch": 1.240032532399795, - "grad_norm": 9.983796119689941, - "learning_rate": 2.9332791126670084e-05, - "loss": 0.7188, - "step": 140270 - }, - { - "epoch": 1.240120935660107, - "grad_norm": 1.982649326324463, - "learning_rate": 2.9331317738998216e-05, - "loss": 0.5284, - "step": 140280 - }, - { - "epoch": 1.2402093389204194, - "grad_norm": 7.236152172088623, - "learning_rate": 2.9329844351326348e-05, - "loss": 0.6579, - "step": 140290 - }, - { - "epoch": 1.2402977421807315, - "grad_norm": 2.171541452407837, - "learning_rate": 2.9328370963654473e-05, - "loss": 0.6806, - "step": 140300 - }, - { - "epoch": 1.2403861454410439, - "grad_norm": 2.270296573638916, - "learning_rate": 2.9326897575982605e-05, - "loss": 0.6705, - "step": 140310 - }, - { - "epoch": 1.2404745487013562, - "grad_norm": 1.9734338521957397, - "learning_rate": 2.9325424188310736e-05, - "loss": 0.6387, - "step": 140320 - }, - { - "epoch": 1.2405629519616683, - "grad_norm": 4.599209785461426, - "learning_rate": 2.932395080063886e-05, - "loss": 0.5914, - "step": 140330 - }, - { - "epoch": 1.2406513552219807, - "grad_norm": 1.9192358255386353, - "learning_rate": 2.9322477412966993e-05, - "loss": 0.5906, - "step": 140340 - }, - { - "epoch": 1.2407397584822928, - "grad_norm": 4.300664901733398, - "learning_rate": 2.9321004025295125e-05, - "loss": 0.6245, - "step": 140350 - }, - { - "epoch": 1.2408281617426051, - "grad_norm": 5.689311504364014, - "learning_rate": 2.931953063762325e-05, - "loss": 0.5603, - "step": 140360 - }, - { - "epoch": 1.2409165650029172, - "grad_norm": 3.6298885345458984, - "learning_rate": 2.931805724995138e-05, - "loss": 0.6237, - "step": 140370 - }, - { - "epoch": 1.2410049682632296, - "grad_norm": 11.114352226257324, - "learning_rate": 2.9316583862279506e-05, - "loss": 0.6418, - "step": 140380 - }, - { - "epoch": 1.2410933715235417, - "grad_norm": 3.2634389400482178, - "learning_rate": 2.9315110474607638e-05, - "loss": 0.5905, - "step": 140390 - }, - { - "epoch": 1.241181774783854, - "grad_norm": 3.1169321537017822, - "learning_rate": 2.931363708693577e-05, - "loss": 0.6444, - "step": 140400 - }, - { - "epoch": 1.2412701780441662, - "grad_norm": 3.5170397758483887, - "learning_rate": 2.9312163699263895e-05, - "loss": 0.7171, - "step": 140410 - }, - { - "epoch": 1.2413585813044785, - "grad_norm": 5.625921726226807, - "learning_rate": 2.9310690311592027e-05, - "loss": 0.7638, - "step": 140420 - }, - { - "epoch": 1.2414469845647909, - "grad_norm": 17.05948829650879, - "learning_rate": 2.930921692392016e-05, - "loss": 0.6317, - "step": 140430 - }, - { - "epoch": 1.241535387825103, - "grad_norm": 2.29343843460083, - "learning_rate": 2.9307743536248283e-05, - "loss": 0.5793, - "step": 140440 - }, - { - "epoch": 1.2416237910854153, - "grad_norm": 1.8508127927780151, - "learning_rate": 2.9306270148576415e-05, - "loss": 0.7046, - "step": 140450 - }, - { - "epoch": 1.2417121943457274, - "grad_norm": 3.533745288848877, - "learning_rate": 2.9304796760904547e-05, - "loss": 0.6784, - "step": 140460 - }, - { - "epoch": 1.2418005976060398, - "grad_norm": 1.58856201171875, - "learning_rate": 2.9303323373232672e-05, - "loss": 0.6206, - "step": 140470 - }, - { - "epoch": 1.241889000866352, - "grad_norm": 3.449031352996826, - "learning_rate": 2.9301849985560803e-05, - "loss": 0.6174, - "step": 140480 - }, - { - "epoch": 1.2419774041266642, - "grad_norm": 1.543929100036621, - "learning_rate": 2.930037659788893e-05, - "loss": 0.6951, - "step": 140490 - }, - { - "epoch": 1.2420658073869764, - "grad_norm": 9.21243953704834, - "learning_rate": 2.929890321021706e-05, - "loss": 0.6359, - "step": 140500 - }, - { - "epoch": 1.2421542106472887, - "grad_norm": 9.06112003326416, - "learning_rate": 2.9297429822545192e-05, - "loss": 0.7242, - "step": 140510 - }, - { - "epoch": 1.2422426139076008, - "grad_norm": 3.261469841003418, - "learning_rate": 2.9295956434873317e-05, - "loss": 0.6628, - "step": 140520 - }, - { - "epoch": 1.2423310171679132, - "grad_norm": 1.5714235305786133, - "learning_rate": 2.929448304720145e-05, - "loss": 0.596, - "step": 140530 - }, - { - "epoch": 1.2424194204282255, - "grad_norm": 1.2306922674179077, - "learning_rate": 2.929300965952958e-05, - "loss": 0.5427, - "step": 140540 - }, - { - "epoch": 1.2425078236885376, - "grad_norm": 4.4726762771606445, - "learning_rate": 2.9291536271857705e-05, - "loss": 0.6596, - "step": 140550 - }, - { - "epoch": 1.24259622694885, - "grad_norm": 2.372318744659424, - "learning_rate": 2.9290062884185837e-05, - "loss": 0.5286, - "step": 140560 - }, - { - "epoch": 1.242684630209162, - "grad_norm": 1.2543840408325195, - "learning_rate": 2.928858949651397e-05, - "loss": 0.5122, - "step": 140570 - }, - { - "epoch": 1.2427730334694744, - "grad_norm": 4.93451452255249, - "learning_rate": 2.9287116108842094e-05, - "loss": 0.5446, - "step": 140580 - }, - { - "epoch": 1.2428614367297866, - "grad_norm": 1.7844778299331665, - "learning_rate": 2.9285642721170226e-05, - "loss": 0.6212, - "step": 140590 - }, - { - "epoch": 1.242949839990099, - "grad_norm": 9.284774780273438, - "learning_rate": 2.9284169333498357e-05, - "loss": 0.7217, - "step": 140600 - }, - { - "epoch": 1.243038243250411, - "grad_norm": 4.928479194641113, - "learning_rate": 2.9282695945826482e-05, - "loss": 0.5224, - "step": 140610 - }, - { - "epoch": 1.2431266465107234, - "grad_norm": 14.97335147857666, - "learning_rate": 2.9281222558154614e-05, - "loss": 0.5793, - "step": 140620 - }, - { - "epoch": 1.2432150497710355, - "grad_norm": 1.0068814754486084, - "learning_rate": 2.927974917048274e-05, - "loss": 0.5943, - "step": 140630 - }, - { - "epoch": 1.2433034530313478, - "grad_norm": 2.9845073223114014, - "learning_rate": 2.927827578281087e-05, - "loss": 0.5788, - "step": 140640 - }, - { - "epoch": 1.2433918562916602, - "grad_norm": 2.9338865280151367, - "learning_rate": 2.9276802395139002e-05, - "loss": 0.6163, - "step": 140650 - }, - { - "epoch": 1.2434802595519723, - "grad_norm": 9.645593643188477, - "learning_rate": 2.9275329007467127e-05, - "loss": 0.6994, - "step": 140660 - }, - { - "epoch": 1.2435686628122844, - "grad_norm": 2.7298977375030518, - "learning_rate": 2.927385561979526e-05, - "loss": 0.6851, - "step": 140670 - }, - { - "epoch": 1.2436570660725967, - "grad_norm": 3.164957046508789, - "learning_rate": 2.927238223212339e-05, - "loss": 0.5353, - "step": 140680 - }, - { - "epoch": 1.243745469332909, - "grad_norm": 3.9340643882751465, - "learning_rate": 2.9270908844451516e-05, - "loss": 0.5433, - "step": 140690 - }, - { - "epoch": 1.2438338725932212, - "grad_norm": 5.256089210510254, - "learning_rate": 2.9269435456779648e-05, - "loss": 0.5532, - "step": 140700 - }, - { - "epoch": 1.2439222758535335, - "grad_norm": 2.4819583892822266, - "learning_rate": 2.926796206910778e-05, - "loss": 0.5825, - "step": 140710 - }, - { - "epoch": 1.2440106791138457, - "grad_norm": 1.4068666696548462, - "learning_rate": 2.9266488681435904e-05, - "loss": 0.5838, - "step": 140720 - }, - { - "epoch": 1.244099082374158, - "grad_norm": 7.233332633972168, - "learning_rate": 2.9265015293764036e-05, - "loss": 0.6525, - "step": 140730 - }, - { - "epoch": 1.2441874856344701, - "grad_norm": 5.608917713165283, - "learning_rate": 2.926354190609216e-05, - "loss": 0.6391, - "step": 140740 - }, - { - "epoch": 1.2442758888947825, - "grad_norm": 6.583284378051758, - "learning_rate": 2.9262068518420293e-05, - "loss": 0.8408, - "step": 140750 - }, - { - "epoch": 1.2443642921550946, - "grad_norm": 1.630718469619751, - "learning_rate": 2.9260595130748425e-05, - "loss": 0.5601, - "step": 140760 - }, - { - "epoch": 1.244452695415407, - "grad_norm": 5.152973175048828, - "learning_rate": 2.925912174307655e-05, - "loss": 0.6414, - "step": 140770 - }, - { - "epoch": 1.244541098675719, - "grad_norm": 2.3453240394592285, - "learning_rate": 2.925764835540468e-05, - "loss": 0.6453, - "step": 140780 - }, - { - "epoch": 1.2446295019360314, - "grad_norm": 1.7918018102645874, - "learning_rate": 2.9256174967732813e-05, - "loss": 0.5182, - "step": 140790 - }, - { - "epoch": 1.2447179051963437, - "grad_norm": 1.9017199277877808, - "learning_rate": 2.9254701580060938e-05, - "loss": 0.5866, - "step": 140800 - }, - { - "epoch": 1.2448063084566559, - "grad_norm": 2.5929338932037354, - "learning_rate": 2.925322819238907e-05, - "loss": 0.6306, - "step": 140810 - }, - { - "epoch": 1.2448947117169682, - "grad_norm": 1.0340843200683594, - "learning_rate": 2.92517548047172e-05, - "loss": 0.6139, - "step": 140820 - }, - { - "epoch": 1.2449831149772803, - "grad_norm": 1.3462214469909668, - "learning_rate": 2.9250281417045326e-05, - "loss": 0.5743, - "step": 140830 - }, - { - "epoch": 1.2450715182375927, - "grad_norm": 2.510620355606079, - "learning_rate": 2.9248808029373458e-05, - "loss": 0.5134, - "step": 140840 - }, - { - "epoch": 1.2451599214979048, - "grad_norm": 5.207735061645508, - "learning_rate": 2.9247334641701583e-05, - "loss": 0.6167, - "step": 140850 - }, - { - "epoch": 1.2452483247582171, - "grad_norm": 4.856535911560059, - "learning_rate": 2.9245861254029715e-05, - "loss": 0.5485, - "step": 140860 - }, - { - "epoch": 1.2453367280185292, - "grad_norm": 6.803157806396484, - "learning_rate": 2.9244387866357847e-05, - "loss": 0.5408, - "step": 140870 - }, - { - "epoch": 1.2454251312788416, - "grad_norm": 3.1833581924438477, - "learning_rate": 2.924291447868597e-05, - "loss": 0.4767, - "step": 140880 - }, - { - "epoch": 1.2455135345391537, - "grad_norm": 2.0532584190368652, - "learning_rate": 2.9241441091014103e-05, - "loss": 0.6454, - "step": 140890 - }, - { - "epoch": 1.245601937799466, - "grad_norm": 2.4384188652038574, - "learning_rate": 2.9239967703342235e-05, - "loss": 0.6185, - "step": 140900 - }, - { - "epoch": 1.2456903410597784, - "grad_norm": 2.536409854888916, - "learning_rate": 2.923849431567036e-05, - "loss": 0.6473, - "step": 140910 - }, - { - "epoch": 1.2457787443200905, - "grad_norm": 1.9948673248291016, - "learning_rate": 2.9237020927998492e-05, - "loss": 0.6627, - "step": 140920 - }, - { - "epoch": 1.2458671475804028, - "grad_norm": 6.776634216308594, - "learning_rate": 2.9235547540326623e-05, - "loss": 0.7353, - "step": 140930 - }, - { - "epoch": 1.245955550840715, - "grad_norm": 2.3835225105285645, - "learning_rate": 2.9234074152654752e-05, - "loss": 0.6793, - "step": 140940 - }, - { - "epoch": 1.2460439541010273, - "grad_norm": 1.0225166082382202, - "learning_rate": 2.923260076498288e-05, - "loss": 0.4932, - "step": 140950 - }, - { - "epoch": 1.2461323573613394, - "grad_norm": 3.9867641925811768, - "learning_rate": 2.923112737731101e-05, - "loss": 0.6659, - "step": 140960 - }, - { - "epoch": 1.2462207606216518, - "grad_norm": 1.974141240119934, - "learning_rate": 2.922965398963914e-05, - "loss": 0.7315, - "step": 140970 - }, - { - "epoch": 1.2463091638819639, - "grad_norm": 1.6168317794799805, - "learning_rate": 2.922818060196727e-05, - "loss": 0.5722, - "step": 140980 - }, - { - "epoch": 1.2463975671422762, - "grad_norm": 4.553557872772217, - "learning_rate": 2.9226707214295397e-05, - "loss": 0.5784, - "step": 140990 - }, - { - "epoch": 1.2464859704025884, - "grad_norm": 1.275503158569336, - "learning_rate": 2.922523382662353e-05, - "loss": 0.4953, - "step": 141000 - }, - { - "epoch": 1.2465743736629007, - "grad_norm": 1.0491254329681396, - "learning_rate": 2.9223760438951657e-05, - "loss": 0.5979, - "step": 141010 - }, - { - "epoch": 1.246662776923213, - "grad_norm": 1.8186928033828735, - "learning_rate": 2.9222287051279785e-05, - "loss": 0.6265, - "step": 141020 - }, - { - "epoch": 1.2467511801835252, - "grad_norm": 2.4338583946228027, - "learning_rate": 2.9220813663607917e-05, - "loss": 0.6928, - "step": 141030 - }, - { - "epoch": 1.2468395834438375, - "grad_norm": 4.873175144195557, - "learning_rate": 2.9219340275936046e-05, - "loss": 0.6879, - "step": 141040 - }, - { - "epoch": 1.2469279867041496, - "grad_norm": 5.91003942489624, - "learning_rate": 2.9217866888264174e-05, - "loss": 0.6794, - "step": 141050 - }, - { - "epoch": 1.247016389964462, - "grad_norm": 1.8183811902999878, - "learning_rate": 2.9216393500592306e-05, - "loss": 0.5291, - "step": 141060 - }, - { - "epoch": 1.247104793224774, - "grad_norm": 4.114963531494141, - "learning_rate": 2.9214920112920434e-05, - "loss": 0.6011, - "step": 141070 - }, - { - "epoch": 1.2471931964850864, - "grad_norm": 3.7473528385162354, - "learning_rate": 2.9213446725248562e-05, - "loss": 0.6559, - "step": 141080 - }, - { - "epoch": 1.2472815997453985, - "grad_norm": 7.758160591125488, - "learning_rate": 2.9211973337576694e-05, - "loss": 0.6818, - "step": 141090 - }, - { - "epoch": 1.2473700030057109, - "grad_norm": 2.686952829360962, - "learning_rate": 2.921049994990482e-05, - "loss": 0.5258, - "step": 141100 - }, - { - "epoch": 1.247458406266023, - "grad_norm": 4.461240291595459, - "learning_rate": 2.920902656223295e-05, - "loss": 0.6901, - "step": 141110 - }, - { - "epoch": 1.2475468095263353, - "grad_norm": 5.235640525817871, - "learning_rate": 2.9207553174561083e-05, - "loss": 0.5542, - "step": 141120 - }, - { - "epoch": 1.2476352127866477, - "grad_norm": 2.567991256713867, - "learning_rate": 2.9206079786889207e-05, - "loss": 0.5565, - "step": 141130 - }, - { - "epoch": 1.2477236160469598, - "grad_norm": 2.196699857711792, - "learning_rate": 2.920460639921734e-05, - "loss": 0.5685, - "step": 141140 - }, - { - "epoch": 1.2478120193072721, - "grad_norm": 1.8159294128417969, - "learning_rate": 2.920313301154547e-05, - "loss": 0.7082, - "step": 141150 - }, - { - "epoch": 1.2479004225675843, - "grad_norm": 2.4379069805145264, - "learning_rate": 2.9201659623873596e-05, - "loss": 0.6524, - "step": 141160 - }, - { - "epoch": 1.2479888258278966, - "grad_norm": 6.3143181800842285, - "learning_rate": 2.9200186236201728e-05, - "loss": 0.581, - "step": 141170 - }, - { - "epoch": 1.2480772290882087, - "grad_norm": 1.6999142169952393, - "learning_rate": 2.919871284852986e-05, - "loss": 0.576, - "step": 141180 - }, - { - "epoch": 1.248165632348521, - "grad_norm": 4.524407863616943, - "learning_rate": 2.9197239460857984e-05, - "loss": 0.5335, - "step": 141190 - }, - { - "epoch": 1.2482540356088332, - "grad_norm": 1.083709716796875, - "learning_rate": 2.9195766073186116e-05, - "loss": 0.7418, - "step": 141200 - }, - { - "epoch": 1.2483424388691455, - "grad_norm": 11.910554885864258, - "learning_rate": 2.919429268551424e-05, - "loss": 0.5651, - "step": 141210 - }, - { - "epoch": 1.2484308421294577, - "grad_norm": 5.971522808074951, - "learning_rate": 2.9192819297842373e-05, - "loss": 0.6524, - "step": 141220 - }, - { - "epoch": 1.24851924538977, - "grad_norm": 8.171072006225586, - "learning_rate": 2.9191345910170505e-05, - "loss": 0.4948, - "step": 141230 - }, - { - "epoch": 1.2486076486500823, - "grad_norm": 6.670187473297119, - "learning_rate": 2.918987252249863e-05, - "loss": 0.6167, - "step": 141240 - }, - { - "epoch": 1.2486960519103945, - "grad_norm": 3.2850587368011475, - "learning_rate": 2.918839913482676e-05, - "loss": 0.7249, - "step": 141250 - }, - { - "epoch": 1.2487844551707066, - "grad_norm": 19.041240692138672, - "learning_rate": 2.9186925747154893e-05, - "loss": 0.6956, - "step": 141260 - }, - { - "epoch": 1.248872858431019, - "grad_norm": 2.8739800453186035, - "learning_rate": 2.9185452359483018e-05, - "loss": 0.6685, - "step": 141270 - }, - { - "epoch": 1.2489612616913313, - "grad_norm": 3.2773549556732178, - "learning_rate": 2.918397897181115e-05, - "loss": 0.6015, - "step": 141280 - }, - { - "epoch": 1.2490496649516434, - "grad_norm": 2.605332136154175, - "learning_rate": 2.918250558413928e-05, - "loss": 0.6365, - "step": 141290 - }, - { - "epoch": 1.2491380682119557, - "grad_norm": 10.375014305114746, - "learning_rate": 2.9181032196467406e-05, - "loss": 0.4976, - "step": 141300 - }, - { - "epoch": 1.2492264714722678, - "grad_norm": 2.152941942214966, - "learning_rate": 2.9179558808795538e-05, - "loss": 0.5866, - "step": 141310 - }, - { - "epoch": 1.2493148747325802, - "grad_norm": 4.143291473388672, - "learning_rate": 2.9178085421123663e-05, - "loss": 0.6137, - "step": 141320 - }, - { - "epoch": 1.2494032779928923, - "grad_norm": 2.2320897579193115, - "learning_rate": 2.9176612033451795e-05, - "loss": 0.5502, - "step": 141330 - }, - { - "epoch": 1.2494916812532046, - "grad_norm": 2.5040457248687744, - "learning_rate": 2.9175138645779927e-05, - "loss": 0.695, - "step": 141340 - }, - { - "epoch": 1.2495800845135168, - "grad_norm": 2.3700103759765625, - "learning_rate": 2.917366525810805e-05, - "loss": 0.7287, - "step": 141350 - }, - { - "epoch": 1.249668487773829, - "grad_norm": 3.8010458946228027, - "learning_rate": 2.9172191870436183e-05, - "loss": 0.5645, - "step": 141360 - }, - { - "epoch": 1.2497568910341412, - "grad_norm": 2.8290529251098633, - "learning_rate": 2.9170718482764315e-05, - "loss": 0.5873, - "step": 141370 - }, - { - "epoch": 1.2498452942944536, - "grad_norm": 5.750149726867676, - "learning_rate": 2.916924509509244e-05, - "loss": 0.5435, - "step": 141380 - }, - { - "epoch": 1.249933697554766, - "grad_norm": 1.9185670614242554, - "learning_rate": 2.9167771707420572e-05, - "loss": 0.5686, - "step": 141390 - }, - { - "epoch": 1.250022100815078, - "grad_norm": 2.6748223304748535, - "learning_rate": 2.9166298319748704e-05, - "loss": 0.5652, - "step": 141400 - }, - { - "epoch": 1.2501105040753904, - "grad_norm": 2.3826236724853516, - "learning_rate": 2.916482493207683e-05, - "loss": 0.7153, - "step": 141410 - }, - { - "epoch": 1.2501989073357025, - "grad_norm": 1.8094279766082764, - "learning_rate": 2.916335154440496e-05, - "loss": 0.5242, - "step": 141420 - }, - { - "epoch": 1.2502873105960148, - "grad_norm": 1.37126624584198, - "learning_rate": 2.9161878156733085e-05, - "loss": 0.6647, - "step": 141430 - }, - { - "epoch": 1.250375713856327, - "grad_norm": 2.8650171756744385, - "learning_rate": 2.9160404769061217e-05, - "loss": 0.6869, - "step": 141440 - }, - { - "epoch": 1.2504641171166393, - "grad_norm": 1.3624227046966553, - "learning_rate": 2.915893138138935e-05, - "loss": 0.498, - "step": 141450 - }, - { - "epoch": 1.2505525203769516, - "grad_norm": 1.9418193101882935, - "learning_rate": 2.9157457993717474e-05, - "loss": 0.6451, - "step": 141460 - }, - { - "epoch": 1.2506409236372638, - "grad_norm": 1.5357246398925781, - "learning_rate": 2.9155984606045605e-05, - "loss": 0.5752, - "step": 141470 - }, - { - "epoch": 1.2507293268975759, - "grad_norm": 5.975726127624512, - "learning_rate": 2.9154511218373737e-05, - "loss": 0.4895, - "step": 141480 - }, - { - "epoch": 1.2508177301578882, - "grad_norm": 7.569196701049805, - "learning_rate": 2.9153037830701862e-05, - "loss": 0.6037, - "step": 141490 - }, - { - "epoch": 1.2509061334182006, - "grad_norm": 5.183529853820801, - "learning_rate": 2.9151564443029994e-05, - "loss": 0.618, - "step": 141500 - }, - { - "epoch": 1.2509945366785127, - "grad_norm": 2.297191619873047, - "learning_rate": 2.9150091055358126e-05, - "loss": 0.5857, - "step": 141510 - }, - { - "epoch": 1.251082939938825, - "grad_norm": 3.5654499530792236, - "learning_rate": 2.914861766768625e-05, - "loss": 0.6124, - "step": 141520 - }, - { - "epoch": 1.2511713431991371, - "grad_norm": 1.1095050573349, - "learning_rate": 2.9147144280014382e-05, - "loss": 0.5371, - "step": 141530 - }, - { - "epoch": 1.2512597464594495, - "grad_norm": 4.851041316986084, - "learning_rate": 2.9145670892342514e-05, - "loss": 0.5768, - "step": 141540 - }, - { - "epoch": 1.2513481497197616, - "grad_norm": 1.8972887992858887, - "learning_rate": 2.914419750467064e-05, - "loss": 0.6216, - "step": 141550 - }, - { - "epoch": 1.251436552980074, - "grad_norm": 3.0215260982513428, - "learning_rate": 2.914272411699877e-05, - "loss": 0.7099, - "step": 141560 - }, - { - "epoch": 1.251524956240386, - "grad_norm": 3.6639623641967773, - "learning_rate": 2.9141250729326896e-05, - "loss": 0.6488, - "step": 141570 - }, - { - "epoch": 1.2516133595006984, - "grad_norm": 9.585492134094238, - "learning_rate": 2.9139777341655027e-05, - "loss": 0.4569, - "step": 141580 - }, - { - "epoch": 1.2517017627610105, - "grad_norm": 2.5919620990753174, - "learning_rate": 2.913830395398316e-05, - "loss": 0.5874, - "step": 141590 - }, - { - "epoch": 1.2517901660213229, - "grad_norm": 5.106435775756836, - "learning_rate": 2.9136830566311284e-05, - "loss": 0.6638, - "step": 141600 - }, - { - "epoch": 1.2518785692816352, - "grad_norm": 8.585777282714844, - "learning_rate": 2.9135357178639416e-05, - "loss": 0.6941, - "step": 141610 - }, - { - "epoch": 1.2519669725419473, - "grad_norm": 3.21549129486084, - "learning_rate": 2.9133883790967548e-05, - "loss": 0.5557, - "step": 141620 - }, - { - "epoch": 1.2520553758022595, - "grad_norm": 19.117788314819336, - "learning_rate": 2.9132410403295673e-05, - "loss": 0.6647, - "step": 141630 - }, - { - "epoch": 1.2521437790625718, - "grad_norm": 2.573091983795166, - "learning_rate": 2.9130937015623804e-05, - "loss": 0.684, - "step": 141640 - }, - { - "epoch": 1.2522321823228841, - "grad_norm": 4.657829284667969, - "learning_rate": 2.9129463627951936e-05, - "loss": 0.5427, - "step": 141650 - }, - { - "epoch": 1.2523205855831963, - "grad_norm": 2.9532501697540283, - "learning_rate": 2.912799024028006e-05, - "loss": 0.5121, - "step": 141660 - }, - { - "epoch": 1.2524089888435086, - "grad_norm": 1.4169063568115234, - "learning_rate": 2.9126516852608193e-05, - "loss": 0.5873, - "step": 141670 - }, - { - "epoch": 1.2524973921038207, - "grad_norm": 21.120031356811523, - "learning_rate": 2.9125043464936318e-05, - "loss": 0.6757, - "step": 141680 - }, - { - "epoch": 1.252585795364133, - "grad_norm": 1.2934825420379639, - "learning_rate": 2.912357007726445e-05, - "loss": 0.5375, - "step": 141690 - }, - { - "epoch": 1.2526741986244452, - "grad_norm": 11.707215309143066, - "learning_rate": 2.912209668959258e-05, - "loss": 0.6225, - "step": 141700 - }, - { - "epoch": 1.2527626018847575, - "grad_norm": 3.3010013103485107, - "learning_rate": 2.9120623301920706e-05, - "loss": 0.4932, - "step": 141710 - }, - { - "epoch": 1.2528510051450699, - "grad_norm": 7.640185356140137, - "learning_rate": 2.9119149914248838e-05, - "loss": 0.6269, - "step": 141720 - }, - { - "epoch": 1.252939408405382, - "grad_norm": 2.6844773292541504, - "learning_rate": 2.911767652657697e-05, - "loss": 0.7, - "step": 141730 - }, - { - "epoch": 1.253027811665694, - "grad_norm": 4.77592658996582, - "learning_rate": 2.9116203138905095e-05, - "loss": 0.613, - "step": 141740 - }, - { - "epoch": 1.2531162149260064, - "grad_norm": 2.815992593765259, - "learning_rate": 2.9114729751233226e-05, - "loss": 0.6259, - "step": 141750 - }, - { - "epoch": 1.2532046181863188, - "grad_norm": 3.3699681758880615, - "learning_rate": 2.9113256363561358e-05, - "loss": 0.6528, - "step": 141760 - }, - { - "epoch": 1.253293021446631, - "grad_norm": 1.2847920656204224, - "learning_rate": 2.9111782975889483e-05, - "loss": 0.6925, - "step": 141770 - }, - { - "epoch": 1.2533814247069432, - "grad_norm": 9.219353675842285, - "learning_rate": 2.9110309588217615e-05, - "loss": 0.593, - "step": 141780 - }, - { - "epoch": 1.2534698279672554, - "grad_norm": 17.176612854003906, - "learning_rate": 2.9108836200545743e-05, - "loss": 0.6063, - "step": 141790 - }, - { - "epoch": 1.2535582312275677, - "grad_norm": 4.702795505523682, - "learning_rate": 2.910736281287387e-05, - "loss": 0.5646, - "step": 141800 - }, - { - "epoch": 1.2536466344878798, - "grad_norm": 9.503539085388184, - "learning_rate": 2.9105889425202003e-05, - "loss": 0.759, - "step": 141810 - }, - { - "epoch": 1.2537350377481922, - "grad_norm": 1.0335949659347534, - "learning_rate": 2.910441603753013e-05, - "loss": 0.5143, - "step": 141820 - }, - { - "epoch": 1.2538234410085045, - "grad_norm": 11.666600227355957, - "learning_rate": 2.910294264985826e-05, - "loss": 0.6094, - "step": 141830 - }, - { - "epoch": 1.2539118442688166, - "grad_norm": 5.775475025177002, - "learning_rate": 2.9101469262186392e-05, - "loss": 0.6613, - "step": 141840 - }, - { - "epoch": 1.2540002475291288, - "grad_norm": 2.63454270362854, - "learning_rate": 2.909999587451452e-05, - "loss": 0.536, - "step": 141850 - }, - { - "epoch": 1.254088650789441, - "grad_norm": 2.5589218139648438, - "learning_rate": 2.909852248684265e-05, - "loss": 0.5766, - "step": 141860 - }, - { - "epoch": 1.2541770540497534, - "grad_norm": 3.598965883255005, - "learning_rate": 2.909704909917078e-05, - "loss": 0.6469, - "step": 141870 - }, - { - "epoch": 1.2542654573100656, - "grad_norm": 5.1218180656433105, - "learning_rate": 2.909557571149891e-05, - "loss": 0.49, - "step": 141880 - }, - { - "epoch": 1.254353860570378, - "grad_norm": 6.656684398651123, - "learning_rate": 2.9094102323827037e-05, - "loss": 0.6965, - "step": 141890 - }, - { - "epoch": 1.25444226383069, - "grad_norm": 4.330774784088135, - "learning_rate": 2.9092628936155165e-05, - "loss": 0.6882, - "step": 141900 - }, - { - "epoch": 1.2545306670910024, - "grad_norm": 1.1962385177612305, - "learning_rate": 2.9091155548483297e-05, - "loss": 0.6487, - "step": 141910 - }, - { - "epoch": 1.2546190703513145, - "grad_norm": 5.509335517883301, - "learning_rate": 2.9089682160811425e-05, - "loss": 0.6785, - "step": 141920 - }, - { - "epoch": 1.2547074736116268, - "grad_norm": 6.97117805480957, - "learning_rate": 2.9088208773139554e-05, - "loss": 0.4744, - "step": 141930 - }, - { - "epoch": 1.2547958768719392, - "grad_norm": 1.8010069131851196, - "learning_rate": 2.9086735385467685e-05, - "loss": 0.6192, - "step": 141940 - }, - { - "epoch": 1.2548842801322513, - "grad_norm": 2.195843458175659, - "learning_rate": 2.9085261997795814e-05, - "loss": 0.6447, - "step": 141950 - }, - { - "epoch": 1.2549726833925634, - "grad_norm": 1.7158185243606567, - "learning_rate": 2.9083788610123942e-05, - "loss": 0.6275, - "step": 141960 - }, - { - "epoch": 1.2550610866528757, - "grad_norm": 4.613834381103516, - "learning_rate": 2.9082315222452074e-05, - "loss": 0.6075, - "step": 141970 - }, - { - "epoch": 1.255149489913188, - "grad_norm": 5.494673728942871, - "learning_rate": 2.9080841834780202e-05, - "loss": 0.5105, - "step": 141980 - }, - { - "epoch": 1.2552378931735002, - "grad_norm": 2.8348162174224854, - "learning_rate": 2.907936844710833e-05, - "loss": 0.6561, - "step": 141990 - }, - { - "epoch": 1.2553262964338125, - "grad_norm": 1.4355186223983765, - "learning_rate": 2.9077895059436462e-05, - "loss": 0.6661, - "step": 142000 - }, - { - "epoch": 1.2554146996941247, - "grad_norm": 2.367457389831543, - "learning_rate": 2.907642167176459e-05, - "loss": 0.6743, - "step": 142010 - }, - { - "epoch": 1.255503102954437, - "grad_norm": 2.9533989429473877, - "learning_rate": 2.907494828409272e-05, - "loss": 0.4789, - "step": 142020 - }, - { - "epoch": 1.2555915062147491, - "grad_norm": 2.8348333835601807, - "learning_rate": 2.907347489642085e-05, - "loss": 0.6829, - "step": 142030 - }, - { - "epoch": 1.2556799094750615, - "grad_norm": 1.6097936630249023, - "learning_rate": 2.9072001508748976e-05, - "loss": 0.4533, - "step": 142040 - }, - { - "epoch": 1.2557683127353738, - "grad_norm": 9.160196304321289, - "learning_rate": 2.9070528121077108e-05, - "loss": 0.5649, - "step": 142050 - }, - { - "epoch": 1.255856715995686, - "grad_norm": 2.1227221488952637, - "learning_rate": 2.906905473340524e-05, - "loss": 0.679, - "step": 142060 - }, - { - "epoch": 1.255945119255998, - "grad_norm": 4.315895080566406, - "learning_rate": 2.9067581345733364e-05, - "loss": 0.652, - "step": 142070 - }, - { - "epoch": 1.2560335225163104, - "grad_norm": 5.385100364685059, - "learning_rate": 2.9066107958061496e-05, - "loss": 0.4854, - "step": 142080 - }, - { - "epoch": 1.2561219257766227, - "grad_norm": 1.7082422971725464, - "learning_rate": 2.9064634570389628e-05, - "loss": 0.734, - "step": 142090 - }, - { - "epoch": 1.2562103290369349, - "grad_norm": 3.230713367462158, - "learning_rate": 2.9063161182717753e-05, - "loss": 0.7296, - "step": 142100 - }, - { - "epoch": 1.2562987322972472, - "grad_norm": 1.7252577543258667, - "learning_rate": 2.9061687795045884e-05, - "loss": 0.6799, - "step": 142110 - }, - { - "epoch": 1.2563871355575593, - "grad_norm": 2.9703972339630127, - "learning_rate": 2.9060214407374016e-05, - "loss": 0.6039, - "step": 142120 - }, - { - "epoch": 1.2564755388178717, - "grad_norm": 2.5418121814727783, - "learning_rate": 2.905874101970214e-05, - "loss": 0.4859, - "step": 142130 - }, - { - "epoch": 1.2565639420781838, - "grad_norm": 4.017408847808838, - "learning_rate": 2.9057267632030273e-05, - "loss": 0.6434, - "step": 142140 - }, - { - "epoch": 1.2566523453384961, - "grad_norm": 3.0217642784118652, - "learning_rate": 2.9055794244358398e-05, - "loss": 0.6172, - "step": 142150 - }, - { - "epoch": 1.2567407485988082, - "grad_norm": 2.3579957485198975, - "learning_rate": 2.905432085668653e-05, - "loss": 0.6518, - "step": 142160 - }, - { - "epoch": 1.2568291518591206, - "grad_norm": 6.132658004760742, - "learning_rate": 2.905284746901466e-05, - "loss": 0.5344, - "step": 142170 - }, - { - "epoch": 1.2569175551194327, - "grad_norm": 2.83100962638855, - "learning_rate": 2.9051374081342786e-05, - "loss": 0.7732, - "step": 142180 - }, - { - "epoch": 1.257005958379745, - "grad_norm": 1.8778926134109497, - "learning_rate": 2.9049900693670918e-05, - "loss": 0.6001, - "step": 142190 - }, - { - "epoch": 1.2570943616400574, - "grad_norm": 2.402885913848877, - "learning_rate": 2.904842730599905e-05, - "loss": 0.5186, - "step": 142200 - }, - { - "epoch": 1.2571827649003695, - "grad_norm": 1.9606581926345825, - "learning_rate": 2.9046953918327175e-05, - "loss": 0.5075, - "step": 142210 - }, - { - "epoch": 1.2572711681606816, - "grad_norm": 6.555304050445557, - "learning_rate": 2.9045480530655306e-05, - "loss": 0.5144, - "step": 142220 - }, - { - "epoch": 1.257359571420994, - "grad_norm": 3.139082908630371, - "learning_rate": 2.9044007142983438e-05, - "loss": 0.6225, - "step": 142230 - }, - { - "epoch": 1.2574479746813063, - "grad_norm": 2.987534761428833, - "learning_rate": 2.9042533755311563e-05, - "loss": 0.4837, - "step": 142240 - }, - { - "epoch": 1.2575363779416184, - "grad_norm": 1.3827580213546753, - "learning_rate": 2.9041060367639695e-05, - "loss": 0.6732, - "step": 142250 - }, - { - "epoch": 1.2576247812019308, - "grad_norm": 4.690704822540283, - "learning_rate": 2.903958697996782e-05, - "loss": 0.6898, - "step": 142260 - }, - { - "epoch": 1.257713184462243, - "grad_norm": 3.160564661026001, - "learning_rate": 2.903811359229595e-05, - "loss": 0.7037, - "step": 142270 - }, - { - "epoch": 1.2578015877225552, - "grad_norm": 4.473592758178711, - "learning_rate": 2.9036640204624083e-05, - "loss": 0.6937, - "step": 142280 - }, - { - "epoch": 1.2578899909828674, - "grad_norm": 4.894199848175049, - "learning_rate": 2.903516681695221e-05, - "loss": 0.6031, - "step": 142290 - }, - { - "epoch": 1.2579783942431797, - "grad_norm": 1.4990565776824951, - "learning_rate": 2.903369342928034e-05, - "loss": 0.5938, - "step": 142300 - }, - { - "epoch": 1.258066797503492, - "grad_norm": 1.4152413606643677, - "learning_rate": 2.9032220041608472e-05, - "loss": 0.5592, - "step": 142310 - }, - { - "epoch": 1.2581552007638042, - "grad_norm": 0.9226208329200745, - "learning_rate": 2.9030746653936597e-05, - "loss": 0.6542, - "step": 142320 - }, - { - "epoch": 1.2582436040241163, - "grad_norm": 3.8320677280426025, - "learning_rate": 2.902927326626473e-05, - "loss": 0.6514, - "step": 142330 - }, - { - "epoch": 1.2583320072844286, - "grad_norm": 2.6828713417053223, - "learning_rate": 2.902779987859286e-05, - "loss": 0.6364, - "step": 142340 - }, - { - "epoch": 1.258420410544741, - "grad_norm": 8.254096031188965, - "learning_rate": 2.9026326490920985e-05, - "loss": 0.5871, - "step": 142350 - }, - { - "epoch": 1.258508813805053, - "grad_norm": 2.9420909881591797, - "learning_rate": 2.9024853103249117e-05, - "loss": 0.6058, - "step": 142360 - }, - { - "epoch": 1.2585972170653654, - "grad_norm": 7.3002800941467285, - "learning_rate": 2.9023379715577242e-05, - "loss": 0.6542, - "step": 142370 - }, - { - "epoch": 1.2586856203256775, - "grad_norm": 4.738195419311523, - "learning_rate": 2.9021906327905374e-05, - "loss": 0.6273, - "step": 142380 - }, - { - "epoch": 1.2587740235859899, - "grad_norm": 5.579693794250488, - "learning_rate": 2.9020432940233505e-05, - "loss": 0.5985, - "step": 142390 - }, - { - "epoch": 1.258862426846302, - "grad_norm": 3.8817455768585205, - "learning_rate": 2.901895955256163e-05, - "loss": 0.5406, - "step": 142400 - }, - { - "epoch": 1.2589508301066143, - "grad_norm": 11.041511535644531, - "learning_rate": 2.9017486164889762e-05, - "loss": 0.5037, - "step": 142410 - }, - { - "epoch": 1.2590392333669267, - "grad_norm": 8.328207015991211, - "learning_rate": 2.9016012777217894e-05, - "loss": 0.5755, - "step": 142420 - }, - { - "epoch": 1.2591276366272388, - "grad_norm": 2.8297994136810303, - "learning_rate": 2.901453938954602e-05, - "loss": 0.678, - "step": 142430 - }, - { - "epoch": 1.259216039887551, - "grad_norm": 2.941640615463257, - "learning_rate": 2.901306600187415e-05, - "loss": 0.7165, - "step": 142440 - }, - { - "epoch": 1.2593044431478633, - "grad_norm": 6.018679618835449, - "learning_rate": 2.9011592614202282e-05, - "loss": 0.5531, - "step": 142450 - }, - { - "epoch": 1.2593928464081756, - "grad_norm": 0.9858852624893188, - "learning_rate": 2.9010119226530407e-05, - "loss": 0.5502, - "step": 142460 - }, - { - "epoch": 1.2594812496684877, - "grad_norm": 3.667024612426758, - "learning_rate": 2.900864583885854e-05, - "loss": 0.5949, - "step": 142470 - }, - { - "epoch": 1.2595696529288, - "grad_norm": 1.5842547416687012, - "learning_rate": 2.900717245118667e-05, - "loss": 0.6418, - "step": 142480 - }, - { - "epoch": 1.2596580561891122, - "grad_norm": 2.1449875831604004, - "learning_rate": 2.9005699063514796e-05, - "loss": 0.5484, - "step": 142490 - }, - { - "epoch": 1.2597464594494245, - "grad_norm": 1.6487674713134766, - "learning_rate": 2.9004225675842928e-05, - "loss": 0.6449, - "step": 142500 - }, - { - "epoch": 1.2598348627097367, - "grad_norm": 1.6574065685272217, - "learning_rate": 2.9002752288171052e-05, - "loss": 0.5596, - "step": 142510 - }, - { - "epoch": 1.259923265970049, - "grad_norm": 2.4455056190490723, - "learning_rate": 2.9001278900499184e-05, - "loss": 0.6108, - "step": 142520 - }, - { - "epoch": 1.2600116692303613, - "grad_norm": 9.824710845947266, - "learning_rate": 2.8999805512827316e-05, - "loss": 0.6233, - "step": 142530 - }, - { - "epoch": 1.2601000724906735, - "grad_norm": 5.204301834106445, - "learning_rate": 2.899833212515544e-05, - "loss": 0.6731, - "step": 142540 - }, - { - "epoch": 1.2601884757509856, - "grad_norm": 3.536147356033325, - "learning_rate": 2.8996858737483573e-05, - "loss": 0.6073, - "step": 142550 - }, - { - "epoch": 1.260276879011298, - "grad_norm": 3.7649152278900146, - "learning_rate": 2.8995385349811704e-05, - "loss": 0.6917, - "step": 142560 - }, - { - "epoch": 1.2603652822716103, - "grad_norm": 3.116579532623291, - "learning_rate": 2.899391196213983e-05, - "loss": 0.6835, - "step": 142570 - }, - { - "epoch": 1.2604536855319224, - "grad_norm": 13.381340026855469, - "learning_rate": 2.899243857446796e-05, - "loss": 0.5467, - "step": 142580 - }, - { - "epoch": 1.2605420887922347, - "grad_norm": 2.4020488262176514, - "learning_rate": 2.8990965186796093e-05, - "loss": 0.6804, - "step": 142590 - }, - { - "epoch": 1.2606304920525468, - "grad_norm": 1.091081976890564, - "learning_rate": 2.8989491799124218e-05, - "loss": 0.6525, - "step": 142600 - }, - { - "epoch": 1.2607188953128592, - "grad_norm": 2.0964810848236084, - "learning_rate": 2.898801841145235e-05, - "loss": 0.6748, - "step": 142610 - }, - { - "epoch": 1.2608072985731713, - "grad_norm": 1.5391497611999512, - "learning_rate": 2.8986545023780475e-05, - "loss": 0.6673, - "step": 142620 - }, - { - "epoch": 1.2608957018334837, - "grad_norm": 1.1725083589553833, - "learning_rate": 2.8985071636108606e-05, - "loss": 0.489, - "step": 142630 - }, - { - "epoch": 1.260984105093796, - "grad_norm": 8.688754081726074, - "learning_rate": 2.8983598248436738e-05, - "loss": 0.5855, - "step": 142640 - }, - { - "epoch": 1.2610725083541081, - "grad_norm": 2.642036199569702, - "learning_rate": 2.8982124860764863e-05, - "loss": 0.6322, - "step": 142650 - }, - { - "epoch": 1.2611609116144202, - "grad_norm": 2.2940332889556885, - "learning_rate": 2.8980651473092995e-05, - "loss": 0.5994, - "step": 142660 - }, - { - "epoch": 1.2612493148747326, - "grad_norm": 2.187880277633667, - "learning_rate": 2.8979178085421126e-05, - "loss": 0.642, - "step": 142670 - }, - { - "epoch": 1.261337718135045, - "grad_norm": 2.172506332397461, - "learning_rate": 2.897770469774925e-05, - "loss": 0.5701, - "step": 142680 - }, - { - "epoch": 1.261426121395357, - "grad_norm": 1.471436858177185, - "learning_rate": 2.8976231310077383e-05, - "loss": 0.6288, - "step": 142690 - }, - { - "epoch": 1.2615145246556694, - "grad_norm": 1.1443840265274048, - "learning_rate": 2.8974757922405515e-05, - "loss": 0.6768, - "step": 142700 - }, - { - "epoch": 1.2616029279159815, - "grad_norm": 2.6344046592712402, - "learning_rate": 2.897328453473364e-05, - "loss": 0.6899, - "step": 142710 - }, - { - "epoch": 1.2616913311762938, - "grad_norm": 9.553025245666504, - "learning_rate": 2.897181114706177e-05, - "loss": 0.7157, - "step": 142720 - }, - { - "epoch": 1.261779734436606, - "grad_norm": 2.090665340423584, - "learning_rate": 2.89703377593899e-05, - "loss": 0.5796, - "step": 142730 - }, - { - "epoch": 1.2618681376969183, - "grad_norm": 8.538515090942383, - "learning_rate": 2.896886437171803e-05, - "loss": 0.5682, - "step": 142740 - }, - { - "epoch": 1.2619565409572304, - "grad_norm": 7.142655372619629, - "learning_rate": 2.896739098404616e-05, - "loss": 0.6292, - "step": 142750 - }, - { - "epoch": 1.2620449442175428, - "grad_norm": 1.1068390607833862, - "learning_rate": 2.896591759637429e-05, - "loss": 0.5977, - "step": 142760 - }, - { - "epoch": 1.2621333474778549, - "grad_norm": 4.276402473449707, - "learning_rate": 2.8964444208702417e-05, - "loss": 0.6011, - "step": 142770 - }, - { - "epoch": 1.2622217507381672, - "grad_norm": 5.4022417068481445, - "learning_rate": 2.896297082103055e-05, - "loss": 0.6588, - "step": 142780 - }, - { - "epoch": 1.2623101539984796, - "grad_norm": 5.604693412780762, - "learning_rate": 2.8961497433358677e-05, - "loss": 0.6636, - "step": 142790 - }, - { - "epoch": 1.2623985572587917, - "grad_norm": 1.653070092201233, - "learning_rate": 2.8960024045686805e-05, - "loss": 0.5639, - "step": 142800 - }, - { - "epoch": 1.2624869605191038, - "grad_norm": 3.6620776653289795, - "learning_rate": 2.8958550658014937e-05, - "loss": 0.7108, - "step": 142810 - }, - { - "epoch": 1.2625753637794161, - "grad_norm": 6.846787929534912, - "learning_rate": 2.8957077270343065e-05, - "loss": 0.6007, - "step": 142820 - }, - { - "epoch": 1.2626637670397285, - "grad_norm": 2.0712146759033203, - "learning_rate": 2.8955603882671194e-05, - "loss": 0.6024, - "step": 142830 - }, - { - "epoch": 1.2627521703000406, - "grad_norm": 7.8736419677734375, - "learning_rate": 2.8954130494999325e-05, - "loss": 0.5729, - "step": 142840 - }, - { - "epoch": 1.262840573560353, - "grad_norm": 4.709074020385742, - "learning_rate": 2.8952657107327454e-05, - "loss": 0.5712, - "step": 142850 - }, - { - "epoch": 1.262928976820665, - "grad_norm": 1.5062496662139893, - "learning_rate": 2.8951183719655582e-05, - "loss": 0.6204, - "step": 142860 - }, - { - "epoch": 1.2630173800809774, - "grad_norm": 2.3152246475219727, - "learning_rate": 2.894971033198371e-05, - "loss": 0.6111, - "step": 142870 - }, - { - "epoch": 1.2631057833412895, - "grad_norm": 2.668691396713257, - "learning_rate": 2.8948236944311842e-05, - "loss": 0.683, - "step": 142880 - }, - { - "epoch": 1.2631941866016019, - "grad_norm": 4.096856117248535, - "learning_rate": 2.894676355663997e-05, - "loss": 0.6052, - "step": 142890 - }, - { - "epoch": 1.2632825898619142, - "grad_norm": 4.328186511993408, - "learning_rate": 2.89452901689681e-05, - "loss": 0.6021, - "step": 142900 - }, - { - "epoch": 1.2633709931222263, - "grad_norm": 3.0060596466064453, - "learning_rate": 2.894381678129623e-05, - "loss": 0.5609, - "step": 142910 - }, - { - "epoch": 1.2634593963825385, - "grad_norm": 2.2473089694976807, - "learning_rate": 2.894234339362436e-05, - "loss": 0.5002, - "step": 142920 - }, - { - "epoch": 1.2635477996428508, - "grad_norm": 13.181225776672363, - "learning_rate": 2.8940870005952487e-05, - "loss": 0.6276, - "step": 142930 - }, - { - "epoch": 1.2636362029031631, - "grad_norm": 1.4006301164627075, - "learning_rate": 2.893939661828062e-05, - "loss": 0.6133, - "step": 142940 - }, - { - "epoch": 1.2637246061634753, - "grad_norm": 1.669376254081726, - "learning_rate": 2.8937923230608747e-05, - "loss": 0.5652, - "step": 142950 - }, - { - "epoch": 1.2638130094237876, - "grad_norm": 3.2969346046447754, - "learning_rate": 2.8936449842936876e-05, - "loss": 0.5649, - "step": 142960 - }, - { - "epoch": 1.2639014126840997, - "grad_norm": 1.5077687501907349, - "learning_rate": 2.8934976455265008e-05, - "loss": 0.6012, - "step": 142970 - }, - { - "epoch": 1.263989815944412, - "grad_norm": 3.3668384552001953, - "learning_rate": 2.8933503067593133e-05, - "loss": 0.7186, - "step": 142980 - }, - { - "epoch": 1.2640782192047242, - "grad_norm": 5.55092716217041, - "learning_rate": 2.8932029679921264e-05, - "loss": 0.6273, - "step": 142990 - }, - { - "epoch": 1.2641666224650365, - "grad_norm": 5.23398494720459, - "learning_rate": 2.8930556292249396e-05, - "loss": 0.6269, - "step": 143000 - }, - { - "epoch": 1.2642550257253489, - "grad_norm": 1.7478502988815308, - "learning_rate": 2.892908290457752e-05, - "loss": 0.597, - "step": 143010 - }, - { - "epoch": 1.264343428985661, - "grad_norm": 2.3077640533447266, - "learning_rate": 2.8927609516905653e-05, - "loss": 0.6253, - "step": 143020 - }, - { - "epoch": 1.264431832245973, - "grad_norm": 4.431246757507324, - "learning_rate": 2.8926136129233784e-05, - "loss": 0.7342, - "step": 143030 - }, - { - "epoch": 1.2645202355062855, - "grad_norm": 1.7367810010910034, - "learning_rate": 2.892466274156191e-05, - "loss": 0.5903, - "step": 143040 - }, - { - "epoch": 1.2646086387665978, - "grad_norm": 3.2478721141815186, - "learning_rate": 2.892318935389004e-05, - "loss": 0.5048, - "step": 143050 - }, - { - "epoch": 1.26469704202691, - "grad_norm": 1.2484029531478882, - "learning_rate": 2.8921715966218173e-05, - "loss": 0.4639, - "step": 143060 - }, - { - "epoch": 1.2647854452872223, - "grad_norm": 2.8576207160949707, - "learning_rate": 2.8920242578546298e-05, - "loss": 0.6445, - "step": 143070 - }, - { - "epoch": 1.2648738485475344, - "grad_norm": 3.724398136138916, - "learning_rate": 2.891876919087443e-05, - "loss": 0.6831, - "step": 143080 - }, - { - "epoch": 1.2649622518078467, - "grad_norm": 4.604367733001709, - "learning_rate": 2.8917295803202555e-05, - "loss": 0.5652, - "step": 143090 - }, - { - "epoch": 1.2650506550681588, - "grad_norm": 3.145444631576538, - "learning_rate": 2.8915822415530686e-05, - "loss": 0.6068, - "step": 143100 - }, - { - "epoch": 1.2651390583284712, - "grad_norm": 2.3527021408081055, - "learning_rate": 2.8914349027858818e-05, - "loss": 0.6915, - "step": 143110 - }, - { - "epoch": 1.2652274615887835, - "grad_norm": 1.904188871383667, - "learning_rate": 2.8912875640186943e-05, - "loss": 0.6783, - "step": 143120 - }, - { - "epoch": 1.2653158648490956, - "grad_norm": 2.834195852279663, - "learning_rate": 2.8911402252515075e-05, - "loss": 0.6958, - "step": 143130 - }, - { - "epoch": 1.2654042681094078, - "grad_norm": 5.731857776641846, - "learning_rate": 2.8909928864843207e-05, - "loss": 0.582, - "step": 143140 - }, - { - "epoch": 1.26549267136972, - "grad_norm": 1.5149227380752563, - "learning_rate": 2.890845547717133e-05, - "loss": 0.5661, - "step": 143150 - }, - { - "epoch": 1.2655810746300324, - "grad_norm": 13.879504203796387, - "learning_rate": 2.8906982089499463e-05, - "loss": 0.5487, - "step": 143160 - }, - { - "epoch": 1.2656694778903446, - "grad_norm": 4.728702068328857, - "learning_rate": 2.8905508701827595e-05, - "loss": 0.5851, - "step": 143170 - }, - { - "epoch": 1.265757881150657, - "grad_norm": 1.6866681575775146, - "learning_rate": 2.890403531415572e-05, - "loss": 0.6349, - "step": 143180 - }, - { - "epoch": 1.265846284410969, - "grad_norm": 1.0111569166183472, - "learning_rate": 2.8902561926483852e-05, - "loss": 0.5505, - "step": 143190 - }, - { - "epoch": 1.2659346876712814, - "grad_norm": 1.425072193145752, - "learning_rate": 2.8901088538811977e-05, - "loss": 0.5108, - "step": 143200 - }, - { - "epoch": 1.2660230909315935, - "grad_norm": 1.514492392539978, - "learning_rate": 2.889961515114011e-05, - "loss": 0.6134, - "step": 143210 - }, - { - "epoch": 1.2661114941919058, - "grad_norm": 1.8787119388580322, - "learning_rate": 2.889814176346824e-05, - "loss": 0.5376, - "step": 143220 - }, - { - "epoch": 1.2661998974522182, - "grad_norm": 13.510932922363281, - "learning_rate": 2.8896668375796365e-05, - "loss": 0.6555, - "step": 143230 - }, - { - "epoch": 1.2662883007125303, - "grad_norm": 8.407179832458496, - "learning_rate": 2.8895194988124497e-05, - "loss": 0.6163, - "step": 143240 - }, - { - "epoch": 1.2663767039728424, - "grad_norm": 3.4686079025268555, - "learning_rate": 2.889372160045263e-05, - "loss": 0.6605, - "step": 143250 - }, - { - "epoch": 1.2664651072331548, - "grad_norm": 1.2689135074615479, - "learning_rate": 2.8892248212780754e-05, - "loss": 0.4418, - "step": 143260 - }, - { - "epoch": 1.266553510493467, - "grad_norm": 3.256218671798706, - "learning_rate": 2.8890774825108885e-05, - "loss": 0.5736, - "step": 143270 - }, - { - "epoch": 1.2666419137537792, - "grad_norm": 2.8123676776885986, - "learning_rate": 2.8889301437437017e-05, - "loss": 0.5124, - "step": 143280 - }, - { - "epoch": 1.2667303170140916, - "grad_norm": 1.206463098526001, - "learning_rate": 2.8887828049765142e-05, - "loss": 0.5684, - "step": 143290 - }, - { - "epoch": 1.2668187202744037, - "grad_norm": 2.8159615993499756, - "learning_rate": 2.8886354662093274e-05, - "loss": 0.6916, - "step": 143300 - }, - { - "epoch": 1.266907123534716, - "grad_norm": 0.7424335479736328, - "learning_rate": 2.8884881274421405e-05, - "loss": 0.5167, - "step": 143310 - }, - { - "epoch": 1.2669955267950281, - "grad_norm": 0.9518781900405884, - "learning_rate": 2.888340788674953e-05, - "loss": 0.6491, - "step": 143320 - }, - { - "epoch": 1.2670839300553405, - "grad_norm": 2.728700637817383, - "learning_rate": 2.8881934499077662e-05, - "loss": 0.6594, - "step": 143330 - }, - { - "epoch": 1.2671723333156526, - "grad_norm": 4.281972408294678, - "learning_rate": 2.8880461111405787e-05, - "loss": 0.6846, - "step": 143340 - }, - { - "epoch": 1.267260736575965, - "grad_norm": 4.021638870239258, - "learning_rate": 2.887898772373392e-05, - "loss": 0.4888, - "step": 143350 - }, - { - "epoch": 1.267349139836277, - "grad_norm": 7.736510753631592, - "learning_rate": 2.887751433606205e-05, - "loss": 0.6918, - "step": 143360 - }, - { - "epoch": 1.2674375430965894, - "grad_norm": 0.790366530418396, - "learning_rate": 2.8876040948390176e-05, - "loss": 0.59, - "step": 143370 - }, - { - "epoch": 1.2675259463569017, - "grad_norm": 1.8299994468688965, - "learning_rate": 2.8874567560718307e-05, - "loss": 0.6455, - "step": 143380 - }, - { - "epoch": 1.2676143496172139, - "grad_norm": 3.1271791458129883, - "learning_rate": 2.887309417304644e-05, - "loss": 0.5926, - "step": 143390 - }, - { - "epoch": 1.267702752877526, - "grad_norm": 2.715224266052246, - "learning_rate": 2.8871620785374564e-05, - "loss": 0.5584, - "step": 143400 - }, - { - "epoch": 1.2677911561378383, - "grad_norm": 6.12232780456543, - "learning_rate": 2.8870147397702696e-05, - "loss": 0.6501, - "step": 143410 - }, - { - "epoch": 1.2678795593981507, - "grad_norm": 5.59682035446167, - "learning_rate": 2.8868674010030828e-05, - "loss": 0.7469, - "step": 143420 - }, - { - "epoch": 1.2679679626584628, - "grad_norm": 6.036071300506592, - "learning_rate": 2.8867200622358953e-05, - "loss": 0.7425, - "step": 143430 - }, - { - "epoch": 1.2680563659187751, - "grad_norm": 1.6482362747192383, - "learning_rate": 2.8865727234687084e-05, - "loss": 0.606, - "step": 143440 - }, - { - "epoch": 1.2681447691790873, - "grad_norm": 2.166813850402832, - "learning_rate": 2.886425384701521e-05, - "loss": 0.667, - "step": 143450 - }, - { - "epoch": 1.2682331724393996, - "grad_norm": 1.209266185760498, - "learning_rate": 2.886278045934334e-05, - "loss": 0.6682, - "step": 143460 - }, - { - "epoch": 1.2683215756997117, - "grad_norm": 11.603596687316895, - "learning_rate": 2.8861307071671473e-05, - "loss": 0.5872, - "step": 143470 - }, - { - "epoch": 1.268409978960024, - "grad_norm": 1.7886656522750854, - "learning_rate": 2.8859833683999598e-05, - "loss": 0.5745, - "step": 143480 - }, - { - "epoch": 1.2684983822203364, - "grad_norm": 3.00777268409729, - "learning_rate": 2.885836029632773e-05, - "loss": 0.6183, - "step": 143490 - }, - { - "epoch": 1.2685867854806485, - "grad_norm": 1.610642433166504, - "learning_rate": 2.885688690865586e-05, - "loss": 0.6505, - "step": 143500 - }, - { - "epoch": 1.2686751887409606, - "grad_norm": 1.5638673305511475, - "learning_rate": 2.8855413520983986e-05, - "loss": 0.525, - "step": 143510 - }, - { - "epoch": 1.268763592001273, - "grad_norm": 3.448699712753296, - "learning_rate": 2.8853940133312118e-05, - "loss": 0.6483, - "step": 143520 - }, - { - "epoch": 1.2688519952615853, - "grad_norm": 13.250480651855469, - "learning_rate": 2.885246674564025e-05, - "loss": 0.4388, - "step": 143530 - }, - { - "epoch": 1.2689403985218974, - "grad_norm": 2.753995656967163, - "learning_rate": 2.8850993357968375e-05, - "loss": 0.6268, - "step": 143540 - }, - { - "epoch": 1.2690288017822098, - "grad_norm": 2.7080774307250977, - "learning_rate": 2.8849519970296506e-05, - "loss": 0.5594, - "step": 143550 - }, - { - "epoch": 1.269117205042522, - "grad_norm": 3.518550157546997, - "learning_rate": 2.884804658262463e-05, - "loss": 0.5845, - "step": 143560 - }, - { - "epoch": 1.2692056083028342, - "grad_norm": 1.6237034797668457, - "learning_rate": 2.8846573194952763e-05, - "loss": 0.5847, - "step": 143570 - }, - { - "epoch": 1.2692940115631464, - "grad_norm": 6.082953929901123, - "learning_rate": 2.8845099807280895e-05, - "loss": 0.722, - "step": 143580 - }, - { - "epoch": 1.2693824148234587, - "grad_norm": 3.562917947769165, - "learning_rate": 2.884362641960902e-05, - "loss": 0.6102, - "step": 143590 - }, - { - "epoch": 1.269470818083771, - "grad_norm": 1.9264031648635864, - "learning_rate": 2.884215303193715e-05, - "loss": 0.4509, - "step": 143600 - }, - { - "epoch": 1.2695592213440832, - "grad_norm": 1.300840139389038, - "learning_rate": 2.8840679644265283e-05, - "loss": 0.5835, - "step": 143610 - }, - { - "epoch": 1.2696476246043953, - "grad_norm": 1.7975136041641235, - "learning_rate": 2.8839206256593408e-05, - "loss": 0.7292, - "step": 143620 - }, - { - "epoch": 1.2697360278647076, - "grad_norm": 5.618459224700928, - "learning_rate": 2.883773286892154e-05, - "loss": 0.4774, - "step": 143630 - }, - { - "epoch": 1.26982443112502, - "grad_norm": 0.8197553753852844, - "learning_rate": 2.883625948124967e-05, - "loss": 0.6866, - "step": 143640 - }, - { - "epoch": 1.269912834385332, - "grad_norm": 6.46420955657959, - "learning_rate": 2.8834786093577797e-05, - "loss": 0.6666, - "step": 143650 - }, - { - "epoch": 1.2700012376456444, - "grad_norm": 7.9257612228393555, - "learning_rate": 2.883331270590593e-05, - "loss": 0.6584, - "step": 143660 - }, - { - "epoch": 1.2700896409059566, - "grad_norm": 3.0767781734466553, - "learning_rate": 2.8831839318234057e-05, - "loss": 0.6179, - "step": 143670 - }, - { - "epoch": 1.270178044166269, - "grad_norm": 1.7076400518417358, - "learning_rate": 2.8830365930562185e-05, - "loss": 0.5594, - "step": 143680 - }, - { - "epoch": 1.270266447426581, - "grad_norm": 3.3699827194213867, - "learning_rate": 2.8828892542890317e-05, - "loss": 0.5119, - "step": 143690 - }, - { - "epoch": 1.2703548506868934, - "grad_norm": 2.9758145809173584, - "learning_rate": 2.8827419155218445e-05, - "loss": 0.6506, - "step": 143700 - }, - { - "epoch": 1.2704432539472057, - "grad_norm": 2.0639407634735107, - "learning_rate": 2.8825945767546574e-05, - "loss": 0.6716, - "step": 143710 - }, - { - "epoch": 1.2705316572075178, - "grad_norm": 2.3735551834106445, - "learning_rate": 2.8824472379874705e-05, - "loss": 0.6813, - "step": 143720 - }, - { - "epoch": 1.27062006046783, - "grad_norm": 1.4210724830627441, - "learning_rate": 2.8822998992202834e-05, - "loss": 0.5981, - "step": 143730 - }, - { - "epoch": 1.2707084637281423, - "grad_norm": 1.2074196338653564, - "learning_rate": 2.8821525604530962e-05, - "loss": 0.5499, - "step": 143740 - }, - { - "epoch": 1.2707968669884546, - "grad_norm": 1.5849781036376953, - "learning_rate": 2.8820052216859094e-05, - "loss": 0.6157, - "step": 143750 - }, - { - "epoch": 1.2708852702487667, - "grad_norm": 7.992796897888184, - "learning_rate": 2.8818578829187222e-05, - "loss": 0.591, - "step": 143760 - }, - { - "epoch": 1.270973673509079, - "grad_norm": 5.470476150512695, - "learning_rate": 2.881710544151535e-05, - "loss": 0.5298, - "step": 143770 - }, - { - "epoch": 1.2710620767693912, - "grad_norm": 6.0549116134643555, - "learning_rate": 2.8815632053843482e-05, - "loss": 0.5797, - "step": 143780 - }, - { - "epoch": 1.2711504800297035, - "grad_norm": 1.3632700443267822, - "learning_rate": 2.881415866617161e-05, - "loss": 0.6716, - "step": 143790 - }, - { - "epoch": 1.2712388832900157, - "grad_norm": 2.3481719493865967, - "learning_rate": 2.881268527849974e-05, - "loss": 0.5658, - "step": 143800 - }, - { - "epoch": 1.271327286550328, - "grad_norm": 2.9604926109313965, - "learning_rate": 2.8811211890827867e-05, - "loss": 0.5784, - "step": 143810 - }, - { - "epoch": 1.2714156898106403, - "grad_norm": 8.447488784790039, - "learning_rate": 2.8809738503156e-05, - "loss": 0.5668, - "step": 143820 - }, - { - "epoch": 1.2715040930709525, - "grad_norm": 20.94457244873047, - "learning_rate": 2.8808265115484127e-05, - "loss": 0.5542, - "step": 143830 - }, - { - "epoch": 1.2715924963312646, - "grad_norm": 2.674136161804199, - "learning_rate": 2.8806791727812256e-05, - "loss": 0.6087, - "step": 143840 - }, - { - "epoch": 1.271680899591577, - "grad_norm": 4.967868328094482, - "learning_rate": 2.8805318340140387e-05, - "loss": 0.5514, - "step": 143850 - }, - { - "epoch": 1.2717693028518893, - "grad_norm": 5.074409484863281, - "learning_rate": 2.8803844952468516e-05, - "loss": 0.713, - "step": 143860 - }, - { - "epoch": 1.2718577061122014, - "grad_norm": 2.294990301132202, - "learning_rate": 2.8802371564796644e-05, - "loss": 0.6364, - "step": 143870 - }, - { - "epoch": 1.2719461093725137, - "grad_norm": 9.884675025939941, - "learning_rate": 2.8800898177124776e-05, - "loss": 0.5875, - "step": 143880 - }, - { - "epoch": 1.2720345126328259, - "grad_norm": 6.053366184234619, - "learning_rate": 2.8799424789452904e-05, - "loss": 0.6447, - "step": 143890 - }, - { - "epoch": 1.2721229158931382, - "grad_norm": 8.812887191772461, - "learning_rate": 2.8797951401781033e-05, - "loss": 0.7148, - "step": 143900 - }, - { - "epoch": 1.2722113191534503, - "grad_norm": 1.4976969957351685, - "learning_rate": 2.8796478014109164e-05, - "loss": 0.5782, - "step": 143910 - }, - { - "epoch": 1.2722997224137627, - "grad_norm": 5.260051727294922, - "learning_rate": 2.879500462643729e-05, - "loss": 0.646, - "step": 143920 - }, - { - "epoch": 1.2723881256740748, - "grad_norm": 8.015777587890625, - "learning_rate": 2.879353123876542e-05, - "loss": 0.677, - "step": 143930 - }, - { - "epoch": 1.2724765289343871, - "grad_norm": 11.101980209350586, - "learning_rate": 2.8792057851093553e-05, - "loss": 0.6675, - "step": 143940 - }, - { - "epoch": 1.2725649321946992, - "grad_norm": 5.310331344604492, - "learning_rate": 2.8790584463421678e-05, - "loss": 0.6534, - "step": 143950 - }, - { - "epoch": 1.2726533354550116, - "grad_norm": 5.071225166320801, - "learning_rate": 2.878911107574981e-05, - "loss": 0.5221, - "step": 143960 - }, - { - "epoch": 1.272741738715324, - "grad_norm": 4.7692155838012695, - "learning_rate": 2.878763768807794e-05, - "loss": 0.5558, - "step": 143970 - }, - { - "epoch": 1.272830141975636, - "grad_norm": 2.1279332637786865, - "learning_rate": 2.8786164300406066e-05, - "loss": 0.7547, - "step": 143980 - }, - { - "epoch": 1.2729185452359484, - "grad_norm": 3.2309956550598145, - "learning_rate": 2.8784690912734198e-05, - "loss": 0.6821, - "step": 143990 - }, - { - "epoch": 1.2730069484962605, - "grad_norm": 2.9643259048461914, - "learning_rate": 2.878321752506233e-05, - "loss": 0.6318, - "step": 144000 - }, - { - "epoch": 1.2730953517565728, - "grad_norm": 3.976378917694092, - "learning_rate": 2.8781744137390455e-05, - "loss": 0.6519, - "step": 144010 - }, - { - "epoch": 1.273183755016885, - "grad_norm": 1.9537153244018555, - "learning_rate": 2.8780270749718586e-05, - "loss": 0.6273, - "step": 144020 - }, - { - "epoch": 1.2732721582771973, - "grad_norm": 3.5452706813812256, - "learning_rate": 2.877879736204671e-05, - "loss": 0.56, - "step": 144030 - }, - { - "epoch": 1.2733605615375094, - "grad_norm": 1.5345059633255005, - "learning_rate": 2.8777323974374843e-05, - "loss": 0.5701, - "step": 144040 - }, - { - "epoch": 1.2734489647978218, - "grad_norm": 4.666220188140869, - "learning_rate": 2.8775850586702975e-05, - "loss": 0.5635, - "step": 144050 - }, - { - "epoch": 1.273537368058134, - "grad_norm": 2.9292423725128174, - "learning_rate": 2.87743771990311e-05, - "loss": 0.5471, - "step": 144060 - }, - { - "epoch": 1.2736257713184462, - "grad_norm": 1.0052542686462402, - "learning_rate": 2.877290381135923e-05, - "loss": 0.559, - "step": 144070 - }, - { - "epoch": 1.2737141745787586, - "grad_norm": 1.4468069076538086, - "learning_rate": 2.8771430423687363e-05, - "loss": 0.6737, - "step": 144080 - }, - { - "epoch": 1.2738025778390707, - "grad_norm": 1.5976800918579102, - "learning_rate": 2.8769957036015488e-05, - "loss": 0.6002, - "step": 144090 - }, - { - "epoch": 1.2738909810993828, - "grad_norm": 3.1899566650390625, - "learning_rate": 2.876848364834362e-05, - "loss": 0.6077, - "step": 144100 - }, - { - "epoch": 1.2739793843596952, - "grad_norm": 5.289868354797363, - "learning_rate": 2.8767010260671752e-05, - "loss": 0.5639, - "step": 144110 - }, - { - "epoch": 1.2740677876200075, - "grad_norm": 0.67649906873703, - "learning_rate": 2.8765536872999877e-05, - "loss": 0.5683, - "step": 144120 - }, - { - "epoch": 1.2741561908803196, - "grad_norm": 1.6645371913909912, - "learning_rate": 2.876406348532801e-05, - "loss": 0.6473, - "step": 144130 - }, - { - "epoch": 1.274244594140632, - "grad_norm": 1.9654217958450317, - "learning_rate": 2.8762590097656133e-05, - "loss": 0.7002, - "step": 144140 - }, - { - "epoch": 1.274332997400944, - "grad_norm": 7.0221662521362305, - "learning_rate": 2.8761116709984265e-05, - "loss": 0.5752, - "step": 144150 - }, - { - "epoch": 1.2744214006612564, - "grad_norm": 1.2534343004226685, - "learning_rate": 2.8759643322312397e-05, - "loss": 0.6451, - "step": 144160 - }, - { - "epoch": 1.2745098039215685, - "grad_norm": 2.1572318077087402, - "learning_rate": 2.8758169934640522e-05, - "loss": 0.5988, - "step": 144170 - }, - { - "epoch": 1.2745982071818809, - "grad_norm": 1.246850609779358, - "learning_rate": 2.8756696546968654e-05, - "loss": 0.6117, - "step": 144180 - }, - { - "epoch": 1.2746866104421932, - "grad_norm": 1.5250139236450195, - "learning_rate": 2.8755223159296785e-05, - "loss": 0.6395, - "step": 144190 - }, - { - "epoch": 1.2747750137025053, - "grad_norm": 10.86414623260498, - "learning_rate": 2.875374977162491e-05, - "loss": 0.6626, - "step": 144200 - }, - { - "epoch": 1.2748634169628175, - "grad_norm": 16.819005966186523, - "learning_rate": 2.8752276383953042e-05, - "loss": 0.6078, - "step": 144210 - }, - { - "epoch": 1.2749518202231298, - "grad_norm": 6.163760185241699, - "learning_rate": 2.8750802996281174e-05, - "loss": 0.5332, - "step": 144220 - }, - { - "epoch": 1.2750402234834421, - "grad_norm": 5.6115031242370605, - "learning_rate": 2.87493296086093e-05, - "loss": 0.7229, - "step": 144230 - }, - { - "epoch": 1.2751286267437543, - "grad_norm": 1.7008734941482544, - "learning_rate": 2.874785622093743e-05, - "loss": 0.6086, - "step": 144240 - }, - { - "epoch": 1.2752170300040666, - "grad_norm": 4.339953899383545, - "learning_rate": 2.8746382833265562e-05, - "loss": 0.7021, - "step": 144250 - }, - { - "epoch": 1.2753054332643787, - "grad_norm": 1.4352452754974365, - "learning_rate": 2.8744909445593687e-05, - "loss": 0.634, - "step": 144260 - }, - { - "epoch": 1.275393836524691, - "grad_norm": 3.6246347427368164, - "learning_rate": 2.874343605792182e-05, - "loss": 0.5599, - "step": 144270 - }, - { - "epoch": 1.2754822397850032, - "grad_norm": 2.590325355529785, - "learning_rate": 2.8741962670249944e-05, - "loss": 0.5862, - "step": 144280 - }, - { - "epoch": 1.2755706430453155, - "grad_norm": 6.288815498352051, - "learning_rate": 2.8740489282578076e-05, - "loss": 0.5432, - "step": 144290 - }, - { - "epoch": 1.2756590463056279, - "grad_norm": 3.886507749557495, - "learning_rate": 2.8739015894906207e-05, - "loss": 0.5607, - "step": 144300 - }, - { - "epoch": 1.27574744956594, - "grad_norm": 2.6747195720672607, - "learning_rate": 2.8737542507234332e-05, - "loss": 0.5247, - "step": 144310 - }, - { - "epoch": 1.2758358528262521, - "grad_norm": 1.249826192855835, - "learning_rate": 2.8736069119562464e-05, - "loss": 0.676, - "step": 144320 - }, - { - "epoch": 1.2759242560865645, - "grad_norm": 0.8176407217979431, - "learning_rate": 2.8734595731890596e-05, - "loss": 0.621, - "step": 144330 - }, - { - "epoch": 1.2760126593468768, - "grad_norm": 1.9243634939193726, - "learning_rate": 2.873312234421872e-05, - "loss": 0.6846, - "step": 144340 - }, - { - "epoch": 1.276101062607189, - "grad_norm": 2.7360658645629883, - "learning_rate": 2.8731648956546853e-05, - "loss": 0.5991, - "step": 144350 - }, - { - "epoch": 1.2761894658675013, - "grad_norm": 3.5600759983062744, - "learning_rate": 2.8730175568874984e-05, - "loss": 0.6171, - "step": 144360 - }, - { - "epoch": 1.2762778691278134, - "grad_norm": 3.715871572494507, - "learning_rate": 2.872870218120311e-05, - "loss": 0.668, - "step": 144370 - }, - { - "epoch": 1.2763662723881257, - "grad_norm": 15.773343086242676, - "learning_rate": 2.872722879353124e-05, - "loss": 0.6257, - "step": 144380 - }, - { - "epoch": 1.2764546756484378, - "grad_norm": 0.5877245664596558, - "learning_rate": 2.8725755405859366e-05, - "loss": 0.5009, - "step": 144390 - }, - { - "epoch": 1.2765430789087502, - "grad_norm": 22.888269424438477, - "learning_rate": 2.8724282018187498e-05, - "loss": 0.6638, - "step": 144400 - }, - { - "epoch": 1.2766314821690625, - "grad_norm": 2.3428499698638916, - "learning_rate": 2.872280863051563e-05, - "loss": 0.6117, - "step": 144410 - }, - { - "epoch": 1.2767198854293746, - "grad_norm": 3.7248363494873047, - "learning_rate": 2.8721335242843754e-05, - "loss": 0.6009, - "step": 144420 - }, - { - "epoch": 1.2768082886896868, - "grad_norm": 2.8315975666046143, - "learning_rate": 2.8719861855171886e-05, - "loss": 0.4665, - "step": 144430 - }, - { - "epoch": 1.276896691949999, - "grad_norm": 8.859705924987793, - "learning_rate": 2.8718388467500018e-05, - "loss": 0.6391, - "step": 144440 - }, - { - "epoch": 1.2769850952103114, - "grad_norm": 2.327495574951172, - "learning_rate": 2.8716915079828143e-05, - "loss": 0.601, - "step": 144450 - }, - { - "epoch": 1.2770734984706236, - "grad_norm": 1.9837312698364258, - "learning_rate": 2.8715441692156275e-05, - "loss": 0.7083, - "step": 144460 - }, - { - "epoch": 1.277161901730936, - "grad_norm": 1.566441297531128, - "learning_rate": 2.8713968304484406e-05, - "loss": 0.5664, - "step": 144470 - }, - { - "epoch": 1.277250304991248, - "grad_norm": 3.0444397926330566, - "learning_rate": 2.871249491681253e-05, - "loss": 0.5965, - "step": 144480 - }, - { - "epoch": 1.2773387082515604, - "grad_norm": 1.6361877918243408, - "learning_rate": 2.8711021529140663e-05, - "loss": 0.5821, - "step": 144490 - }, - { - "epoch": 1.2774271115118725, - "grad_norm": 1.4208300113677979, - "learning_rate": 2.8709548141468788e-05, - "loss": 0.5275, - "step": 144500 - }, - { - "epoch": 1.2775155147721848, - "grad_norm": 3.640321969985962, - "learning_rate": 2.870807475379692e-05, - "loss": 0.565, - "step": 144510 - }, - { - "epoch": 1.2776039180324972, - "grad_norm": 5.088964939117432, - "learning_rate": 2.870660136612505e-05, - "loss": 0.6934, - "step": 144520 - }, - { - "epoch": 1.2776923212928093, - "grad_norm": 2.4783012866973877, - "learning_rate": 2.8705127978453176e-05, - "loss": 0.5838, - "step": 144530 - }, - { - "epoch": 1.2777807245531214, - "grad_norm": 2.5613768100738525, - "learning_rate": 2.8703654590781308e-05, - "loss": 0.6613, - "step": 144540 - }, - { - "epoch": 1.2778691278134338, - "grad_norm": 1.3631033897399902, - "learning_rate": 2.870218120310944e-05, - "loss": 0.8001, - "step": 144550 - }, - { - "epoch": 1.277957531073746, - "grad_norm": 7.077154159545898, - "learning_rate": 2.8700707815437565e-05, - "loss": 0.6563, - "step": 144560 - }, - { - "epoch": 1.2780459343340582, - "grad_norm": 1.1418441534042358, - "learning_rate": 2.8699234427765697e-05, - "loss": 0.6233, - "step": 144570 - }, - { - "epoch": 1.2781343375943706, - "grad_norm": 7.944869041442871, - "learning_rate": 2.869776104009383e-05, - "loss": 0.4808, - "step": 144580 - }, - { - "epoch": 1.2782227408546827, - "grad_norm": 8.782295227050781, - "learning_rate": 2.8696287652421953e-05, - "loss": 0.6318, - "step": 144590 - }, - { - "epoch": 1.278311144114995, - "grad_norm": 3.41955304145813, - "learning_rate": 2.8694814264750085e-05, - "loss": 0.6655, - "step": 144600 - }, - { - "epoch": 1.2783995473753071, - "grad_norm": 2.343939781188965, - "learning_rate": 2.8693340877078213e-05, - "loss": 0.7034, - "step": 144610 - }, - { - "epoch": 1.2784879506356195, - "grad_norm": 0.9646592736244202, - "learning_rate": 2.8691867489406342e-05, - "loss": 0.706, - "step": 144620 - }, - { - "epoch": 1.2785763538959316, - "grad_norm": 2.6781017780303955, - "learning_rate": 2.8690394101734474e-05, - "loss": 0.5193, - "step": 144630 - }, - { - "epoch": 1.278664757156244, - "grad_norm": 4.066171169281006, - "learning_rate": 2.8688920714062602e-05, - "loss": 0.6338, - "step": 144640 - }, - { - "epoch": 1.278753160416556, - "grad_norm": 10.386735916137695, - "learning_rate": 2.868744732639073e-05, - "loss": 0.6423, - "step": 144650 - }, - { - "epoch": 1.2788415636768684, - "grad_norm": 0.9358965158462524, - "learning_rate": 2.8685973938718862e-05, - "loss": 0.5591, - "step": 144660 - }, - { - "epoch": 1.2789299669371808, - "grad_norm": 4.788538932800293, - "learning_rate": 2.868450055104699e-05, - "loss": 0.5652, - "step": 144670 - }, - { - "epoch": 1.2790183701974929, - "grad_norm": 1.6475518941879272, - "learning_rate": 2.868302716337512e-05, - "loss": 0.5754, - "step": 144680 - }, - { - "epoch": 1.279106773457805, - "grad_norm": 2.2971761226654053, - "learning_rate": 2.868155377570325e-05, - "loss": 0.6217, - "step": 144690 - }, - { - "epoch": 1.2791951767181173, - "grad_norm": 12.338384628295898, - "learning_rate": 2.868008038803138e-05, - "loss": 0.5459, - "step": 144700 - }, - { - "epoch": 1.2792835799784297, - "grad_norm": 14.136381149291992, - "learning_rate": 2.8678607000359507e-05, - "loss": 0.6504, - "step": 144710 - }, - { - "epoch": 1.2793719832387418, - "grad_norm": 1.352890968322754, - "learning_rate": 2.867713361268764e-05, - "loss": 0.6775, - "step": 144720 - }, - { - "epoch": 1.2794603864990541, - "grad_norm": 1.5157339572906494, - "learning_rate": 2.8675660225015767e-05, - "loss": 0.5784, - "step": 144730 - }, - { - "epoch": 1.2795487897593663, - "grad_norm": 2.1194818019866943, - "learning_rate": 2.8674186837343896e-05, - "loss": 0.5803, - "step": 144740 - }, - { - "epoch": 1.2796371930196786, - "grad_norm": 4.2607855796813965, - "learning_rate": 2.8672713449672024e-05, - "loss": 0.5947, - "step": 144750 - }, - { - "epoch": 1.2797255962799907, - "grad_norm": 6.949308395385742, - "learning_rate": 2.8671240062000156e-05, - "loss": 0.6539, - "step": 144760 - }, - { - "epoch": 1.279813999540303, - "grad_norm": 1.5105788707733154, - "learning_rate": 2.8669766674328284e-05, - "loss": 0.5601, - "step": 144770 - }, - { - "epoch": 1.2799024028006154, - "grad_norm": 1.5485703945159912, - "learning_rate": 2.8668293286656412e-05, - "loss": 0.6481, - "step": 144780 - }, - { - "epoch": 1.2799908060609275, - "grad_norm": 2.6597132682800293, - "learning_rate": 2.8666819898984544e-05, - "loss": 0.599, - "step": 144790 - }, - { - "epoch": 1.2800792093212396, - "grad_norm": 3.5043883323669434, - "learning_rate": 2.8665346511312673e-05, - "loss": 0.656, - "step": 144800 - }, - { - "epoch": 1.280167612581552, - "grad_norm": 0.8487949967384338, - "learning_rate": 2.86638731236408e-05, - "loss": 0.5207, - "step": 144810 - }, - { - "epoch": 1.2802560158418643, - "grad_norm": 2.381865978240967, - "learning_rate": 2.8662399735968933e-05, - "loss": 0.6365, - "step": 144820 - }, - { - "epoch": 1.2803444191021764, - "grad_norm": 4.491841793060303, - "learning_rate": 2.866092634829706e-05, - "loss": 0.6697, - "step": 144830 - }, - { - "epoch": 1.2804328223624888, - "grad_norm": 6.116016387939453, - "learning_rate": 2.865945296062519e-05, - "loss": 0.6145, - "step": 144840 - }, - { - "epoch": 1.280521225622801, - "grad_norm": 3.354546546936035, - "learning_rate": 2.865797957295332e-05, - "loss": 0.5288, - "step": 144850 - }, - { - "epoch": 1.2806096288831132, - "grad_norm": 4.211579322814941, - "learning_rate": 2.8656506185281446e-05, - "loss": 0.7173, - "step": 144860 - }, - { - "epoch": 1.2806980321434254, - "grad_norm": 8.3267183303833, - "learning_rate": 2.8655032797609578e-05, - "loss": 0.6671, - "step": 144870 - }, - { - "epoch": 1.2807864354037377, - "grad_norm": 11.029805183410645, - "learning_rate": 2.865355940993771e-05, - "loss": 0.6084, - "step": 144880 - }, - { - "epoch": 1.28087483866405, - "grad_norm": 3.079167366027832, - "learning_rate": 2.8652086022265834e-05, - "loss": 0.6042, - "step": 144890 - }, - { - "epoch": 1.2809632419243622, - "grad_norm": 1.026294231414795, - "learning_rate": 2.8650612634593966e-05, - "loss": 0.5229, - "step": 144900 - }, - { - "epoch": 1.2810516451846743, - "grad_norm": 2.931396245956421, - "learning_rate": 2.8649139246922098e-05, - "loss": 0.6454, - "step": 144910 - }, - { - "epoch": 1.2811400484449866, - "grad_norm": 3.786447048187256, - "learning_rate": 2.8647665859250223e-05, - "loss": 0.599, - "step": 144920 - }, - { - "epoch": 1.281228451705299, - "grad_norm": 2.0971574783325195, - "learning_rate": 2.8646192471578355e-05, - "loss": 0.6693, - "step": 144930 - }, - { - "epoch": 1.281316854965611, - "grad_norm": 4.028332710266113, - "learning_rate": 2.8644719083906486e-05, - "loss": 0.734, - "step": 144940 - }, - { - "epoch": 1.2814052582259234, - "grad_norm": 2.2117221355438232, - "learning_rate": 2.864324569623461e-05, - "loss": 0.5475, - "step": 144950 - }, - { - "epoch": 1.2814936614862356, - "grad_norm": 2.2046737670898438, - "learning_rate": 2.8641772308562743e-05, - "loss": 0.5736, - "step": 144960 - }, - { - "epoch": 1.281582064746548, - "grad_norm": 6.036850452423096, - "learning_rate": 2.8640298920890868e-05, - "loss": 0.6335, - "step": 144970 - }, - { - "epoch": 1.28167046800686, - "grad_norm": 2.936176061630249, - "learning_rate": 2.8638825533219e-05, - "loss": 0.6286, - "step": 144980 - }, - { - "epoch": 1.2817588712671724, - "grad_norm": 3.474818229675293, - "learning_rate": 2.863735214554713e-05, - "loss": 0.5419, - "step": 144990 - }, - { - "epoch": 1.2818472745274847, - "grad_norm": 13.819268226623535, - "learning_rate": 2.8635878757875257e-05, - "loss": 0.7727, - "step": 145000 - }, - { - "epoch": 1.2819356777877968, - "grad_norm": 9.787267684936523, - "learning_rate": 2.8634405370203388e-05, - "loss": 0.5939, - "step": 145010 - }, - { - "epoch": 1.282024081048109, - "grad_norm": 8.66058349609375, - "learning_rate": 2.863293198253152e-05, - "loss": 0.835, - "step": 145020 - }, - { - "epoch": 1.2821124843084213, - "grad_norm": 1.4130849838256836, - "learning_rate": 2.8631458594859645e-05, - "loss": 0.568, - "step": 145030 - }, - { - "epoch": 1.2822008875687336, - "grad_norm": 1.268191933631897, - "learning_rate": 2.8629985207187777e-05, - "loss": 0.4368, - "step": 145040 - }, - { - "epoch": 1.2822892908290457, - "grad_norm": 3.038203239440918, - "learning_rate": 2.862851181951591e-05, - "loss": 0.5463, - "step": 145050 - }, - { - "epoch": 1.282377694089358, - "grad_norm": 5.102221488952637, - "learning_rate": 2.8627038431844033e-05, - "loss": 0.7606, - "step": 145060 - }, - { - "epoch": 1.2824660973496702, - "grad_norm": 2.93377947807312, - "learning_rate": 2.8625565044172165e-05, - "loss": 0.7012, - "step": 145070 - }, - { - "epoch": 1.2825545006099826, - "grad_norm": 3.0152440071105957, - "learning_rate": 2.862409165650029e-05, - "loss": 0.6868, - "step": 145080 - }, - { - "epoch": 1.2826429038702947, - "grad_norm": 0.9875383973121643, - "learning_rate": 2.8622618268828422e-05, - "loss": 0.4813, - "step": 145090 - }, - { - "epoch": 1.282731307130607, - "grad_norm": 2.975111484527588, - "learning_rate": 2.8621144881156554e-05, - "loss": 0.7272, - "step": 145100 - }, - { - "epoch": 1.2828197103909194, - "grad_norm": 2.956238269805908, - "learning_rate": 2.861967149348468e-05, - "loss": 0.6224, - "step": 145110 - }, - { - "epoch": 1.2829081136512315, - "grad_norm": 6.5094895362854, - "learning_rate": 2.861819810581281e-05, - "loss": 0.6055, - "step": 145120 - }, - { - "epoch": 1.2829965169115436, - "grad_norm": 5.5012526512146, - "learning_rate": 2.8616724718140942e-05, - "loss": 0.5386, - "step": 145130 - }, - { - "epoch": 1.283084920171856, - "grad_norm": 3.321120023727417, - "learning_rate": 2.8615251330469067e-05, - "loss": 0.4525, - "step": 145140 - }, - { - "epoch": 1.2831733234321683, - "grad_norm": 2.4823215007781982, - "learning_rate": 2.86137779427972e-05, - "loss": 0.6326, - "step": 145150 - }, - { - "epoch": 1.2832617266924804, - "grad_norm": 9.892343521118164, - "learning_rate": 2.861230455512533e-05, - "loss": 0.5858, - "step": 145160 - }, - { - "epoch": 1.2833501299527927, - "grad_norm": 3.45646071434021, - "learning_rate": 2.8610831167453456e-05, - "loss": 0.5829, - "step": 145170 - }, - { - "epoch": 1.2834385332131049, - "grad_norm": 1.6325308084487915, - "learning_rate": 2.8609357779781587e-05, - "loss": 0.573, - "step": 145180 - }, - { - "epoch": 1.2835269364734172, - "grad_norm": 3.046881675720215, - "learning_rate": 2.860788439210972e-05, - "loss": 0.7265, - "step": 145190 - }, - { - "epoch": 1.2836153397337293, - "grad_norm": 6.056140422821045, - "learning_rate": 2.8606411004437844e-05, - "loss": 0.5527, - "step": 145200 - }, - { - "epoch": 1.2837037429940417, - "grad_norm": 3.9403016567230225, - "learning_rate": 2.8604937616765976e-05, - "loss": 0.5361, - "step": 145210 - }, - { - "epoch": 1.2837921462543538, - "grad_norm": 2.8443801403045654, - "learning_rate": 2.86034642290941e-05, - "loss": 0.5384, - "step": 145220 - }, - { - "epoch": 1.2838805495146661, - "grad_norm": 1.457789659500122, - "learning_rate": 2.8601990841422232e-05, - "loss": 0.5964, - "step": 145230 - }, - { - "epoch": 1.2839689527749782, - "grad_norm": 2.6842362880706787, - "learning_rate": 2.8600517453750364e-05, - "loss": 0.6268, - "step": 145240 - }, - { - "epoch": 1.2840573560352906, - "grad_norm": 2.956540584564209, - "learning_rate": 2.859904406607849e-05, - "loss": 0.6093, - "step": 145250 - }, - { - "epoch": 1.284145759295603, - "grad_norm": 1.686755657196045, - "learning_rate": 2.859757067840662e-05, - "loss": 0.5026, - "step": 145260 - }, - { - "epoch": 1.284234162555915, - "grad_norm": 4.004009246826172, - "learning_rate": 2.8596097290734753e-05, - "loss": 0.5831, - "step": 145270 - }, - { - "epoch": 1.2843225658162272, - "grad_norm": 1.9915940761566162, - "learning_rate": 2.8594623903062878e-05, - "loss": 0.5448, - "step": 145280 - }, - { - "epoch": 1.2844109690765395, - "grad_norm": 3.8886215686798096, - "learning_rate": 2.859315051539101e-05, - "loss": 0.6719, - "step": 145290 - }, - { - "epoch": 1.2844993723368519, - "grad_norm": 2.0705909729003906, - "learning_rate": 2.859167712771914e-05, - "loss": 0.5929, - "step": 145300 - }, - { - "epoch": 1.284587775597164, - "grad_norm": 3.3223421573638916, - "learning_rate": 2.8590203740047266e-05, - "loss": 0.5063, - "step": 145310 - }, - { - "epoch": 1.2846761788574763, - "grad_norm": 9.860546112060547, - "learning_rate": 2.8588730352375398e-05, - "loss": 0.5811, - "step": 145320 - }, - { - "epoch": 1.2847645821177884, - "grad_norm": 0.9417491555213928, - "learning_rate": 2.8587256964703523e-05, - "loss": 0.6186, - "step": 145330 - }, - { - "epoch": 1.2848529853781008, - "grad_norm": 3.1897246837615967, - "learning_rate": 2.8585783577031654e-05, - "loss": 0.6089, - "step": 145340 - }, - { - "epoch": 1.284941388638413, - "grad_norm": 1.8526476621627808, - "learning_rate": 2.8584310189359786e-05, - "loss": 0.6641, - "step": 145350 - }, - { - "epoch": 1.2850297918987252, - "grad_norm": 1.4419277906417847, - "learning_rate": 2.858283680168791e-05, - "loss": 0.6775, - "step": 145360 - }, - { - "epoch": 1.2851181951590376, - "grad_norm": 10.389781951904297, - "learning_rate": 2.8581363414016043e-05, - "loss": 0.5123, - "step": 145370 - }, - { - "epoch": 1.2852065984193497, - "grad_norm": 3.1473987102508545, - "learning_rate": 2.8579890026344175e-05, - "loss": 0.5282, - "step": 145380 - }, - { - "epoch": 1.2852950016796618, - "grad_norm": 1.3255176544189453, - "learning_rate": 2.85784166386723e-05, - "loss": 0.6065, - "step": 145390 - }, - { - "epoch": 1.2853834049399742, - "grad_norm": 6.769455909729004, - "learning_rate": 2.857694325100043e-05, - "loss": 0.6503, - "step": 145400 - }, - { - "epoch": 1.2854718082002865, - "grad_norm": 4.0513505935668945, - "learning_rate": 2.8575469863328563e-05, - "loss": 0.511, - "step": 145410 - }, - { - "epoch": 1.2855602114605986, - "grad_norm": 5.690187454223633, - "learning_rate": 2.8573996475656688e-05, - "loss": 0.6757, - "step": 145420 - }, - { - "epoch": 1.285648614720911, - "grad_norm": 0.7033282518386841, - "learning_rate": 2.857252308798482e-05, - "loss": 0.6104, - "step": 145430 - }, - { - "epoch": 1.285737017981223, - "grad_norm": 3.491544008255005, - "learning_rate": 2.8571049700312945e-05, - "loss": 0.5733, - "step": 145440 - }, - { - "epoch": 1.2858254212415354, - "grad_norm": 1.6139774322509766, - "learning_rate": 2.8569576312641077e-05, - "loss": 0.5101, - "step": 145450 - }, - { - "epoch": 1.2859138245018475, - "grad_norm": 7.899396896362305, - "learning_rate": 2.8568102924969208e-05, - "loss": 0.657, - "step": 145460 - }, - { - "epoch": 1.2860022277621599, - "grad_norm": 2.78837251663208, - "learning_rate": 2.8566629537297333e-05, - "loss": 0.6428, - "step": 145470 - }, - { - "epoch": 1.2860906310224722, - "grad_norm": 11.340784072875977, - "learning_rate": 2.8565156149625465e-05, - "loss": 0.7063, - "step": 145480 - }, - { - "epoch": 1.2861790342827844, - "grad_norm": 9.237870216369629, - "learning_rate": 2.8563682761953597e-05, - "loss": 0.596, - "step": 145490 - }, - { - "epoch": 1.2862674375430965, - "grad_norm": 2.3367531299591064, - "learning_rate": 2.856220937428172e-05, - "loss": 0.5436, - "step": 145500 - }, - { - "epoch": 1.2863558408034088, - "grad_norm": 2.500900983810425, - "learning_rate": 2.8560735986609853e-05, - "loss": 0.5673, - "step": 145510 - }, - { - "epoch": 1.2864442440637212, - "grad_norm": 1.7571214437484741, - "learning_rate": 2.8559262598937985e-05, - "loss": 0.5923, - "step": 145520 - }, - { - "epoch": 1.2865326473240333, - "grad_norm": 2.621816396713257, - "learning_rate": 2.855778921126611e-05, - "loss": 0.6661, - "step": 145530 - }, - { - "epoch": 1.2866210505843456, - "grad_norm": 1.5630762577056885, - "learning_rate": 2.8556315823594242e-05, - "loss": 0.6048, - "step": 145540 - }, - { - "epoch": 1.2867094538446577, - "grad_norm": 8.00423812866211, - "learning_rate": 2.8554842435922374e-05, - "loss": 0.6142, - "step": 145550 - }, - { - "epoch": 1.28679785710497, - "grad_norm": 1.4410505294799805, - "learning_rate": 2.85533690482505e-05, - "loss": 0.5269, - "step": 145560 - }, - { - "epoch": 1.2868862603652822, - "grad_norm": 3.0488429069519043, - "learning_rate": 2.855189566057863e-05, - "loss": 0.5433, - "step": 145570 - }, - { - "epoch": 1.2869746636255945, - "grad_norm": 6.676955223083496, - "learning_rate": 2.855042227290676e-05, - "loss": 0.5935, - "step": 145580 - }, - { - "epoch": 1.2870630668859069, - "grad_norm": 4.665261745452881, - "learning_rate": 2.8548948885234887e-05, - "loss": 0.6498, - "step": 145590 - }, - { - "epoch": 1.287151470146219, - "grad_norm": 3.844132661819458, - "learning_rate": 2.854747549756302e-05, - "loss": 0.6117, - "step": 145600 - }, - { - "epoch": 1.2872398734065311, - "grad_norm": 7.462503433227539, - "learning_rate": 2.8546002109891147e-05, - "loss": 0.5184, - "step": 145610 - }, - { - "epoch": 1.2873282766668435, - "grad_norm": 2.0592591762542725, - "learning_rate": 2.8544528722219275e-05, - "loss": 0.5423, - "step": 145620 - }, - { - "epoch": 1.2874166799271558, - "grad_norm": 7.324609279632568, - "learning_rate": 2.8543055334547407e-05, - "loss": 0.613, - "step": 145630 - }, - { - "epoch": 1.287505083187468, - "grad_norm": 1.4013365507125854, - "learning_rate": 2.8541581946875536e-05, - "loss": 0.7144, - "step": 145640 - }, - { - "epoch": 1.2875934864477803, - "grad_norm": 7.762128829956055, - "learning_rate": 2.8540108559203664e-05, - "loss": 0.6142, - "step": 145650 - }, - { - "epoch": 1.2876818897080924, - "grad_norm": 2.950435161590576, - "learning_rate": 2.8538635171531796e-05, - "loss": 0.7119, - "step": 145660 - }, - { - "epoch": 1.2877702929684047, - "grad_norm": 6.425849437713623, - "learning_rate": 2.8537161783859924e-05, - "loss": 0.6833, - "step": 145670 - }, - { - "epoch": 1.2878586962287168, - "grad_norm": 2.2536520957946777, - "learning_rate": 2.8535688396188052e-05, - "loss": 0.6732, - "step": 145680 - }, - { - "epoch": 1.2879470994890292, - "grad_norm": 16.938657760620117, - "learning_rate": 2.853421500851618e-05, - "loss": 0.6439, - "step": 145690 - }, - { - "epoch": 1.2880355027493415, - "grad_norm": 7.789825916290283, - "learning_rate": 2.8532741620844312e-05, - "loss": 0.6726, - "step": 145700 - }, - { - "epoch": 1.2881239060096537, - "grad_norm": 3.132148504257202, - "learning_rate": 2.853126823317244e-05, - "loss": 0.5345, - "step": 145710 - }, - { - "epoch": 1.2882123092699658, - "grad_norm": 2.9491019248962402, - "learning_rate": 2.852979484550057e-05, - "loss": 0.6191, - "step": 145720 - }, - { - "epoch": 1.2883007125302781, - "grad_norm": 0.851676881313324, - "learning_rate": 2.85283214578287e-05, - "loss": 0.6708, - "step": 145730 - }, - { - "epoch": 1.2883891157905905, - "grad_norm": 2.125684976577759, - "learning_rate": 2.852684807015683e-05, - "loss": 0.6749, - "step": 145740 - }, - { - "epoch": 1.2884775190509026, - "grad_norm": 7.701344013214111, - "learning_rate": 2.8525374682484958e-05, - "loss": 0.5242, - "step": 145750 - }, - { - "epoch": 1.288565922311215, - "grad_norm": 7.966821193695068, - "learning_rate": 2.852390129481309e-05, - "loss": 0.5904, - "step": 145760 - }, - { - "epoch": 1.288654325571527, - "grad_norm": 6.028903961181641, - "learning_rate": 2.8522427907141218e-05, - "loss": 0.554, - "step": 145770 - }, - { - "epoch": 1.2887427288318394, - "grad_norm": 1.3743746280670166, - "learning_rate": 2.8520954519469346e-05, - "loss": 0.6797, - "step": 145780 - }, - { - "epoch": 1.2888311320921515, - "grad_norm": 1.4279412031173706, - "learning_rate": 2.8519481131797478e-05, - "loss": 0.5223, - "step": 145790 - }, - { - "epoch": 1.2889195353524638, - "grad_norm": 2.4263710975646973, - "learning_rate": 2.8518007744125603e-05, - "loss": 0.5801, - "step": 145800 - }, - { - "epoch": 1.289007938612776, - "grad_norm": 7.836480617523193, - "learning_rate": 2.8516534356453735e-05, - "loss": 0.702, - "step": 145810 - }, - { - "epoch": 1.2890963418730883, - "grad_norm": 13.650886535644531, - "learning_rate": 2.8515060968781866e-05, - "loss": 0.5048, - "step": 145820 - }, - { - "epoch": 1.2891847451334004, - "grad_norm": 4.52866268157959, - "learning_rate": 2.851358758110999e-05, - "loss": 0.7947, - "step": 145830 - }, - { - "epoch": 1.2892731483937128, - "grad_norm": 6.007741928100586, - "learning_rate": 2.8512114193438123e-05, - "loss": 0.5939, - "step": 145840 - }, - { - "epoch": 1.289361551654025, - "grad_norm": 11.088205337524414, - "learning_rate": 2.8510640805766255e-05, - "loss": 0.5385, - "step": 145850 - }, - { - "epoch": 1.2894499549143372, - "grad_norm": 2.0562798976898193, - "learning_rate": 2.850916741809438e-05, - "loss": 0.6746, - "step": 145860 - }, - { - "epoch": 1.2895383581746493, - "grad_norm": 1.281401515007019, - "learning_rate": 2.850769403042251e-05, - "loss": 0.4941, - "step": 145870 - }, - { - "epoch": 1.2896267614349617, - "grad_norm": 7.045774459838867, - "learning_rate": 2.8506220642750643e-05, - "loss": 0.5254, - "step": 145880 - }, - { - "epoch": 1.289715164695274, - "grad_norm": 4.512178421020508, - "learning_rate": 2.8504747255078768e-05, - "loss": 0.5488, - "step": 145890 - }, - { - "epoch": 1.2898035679555861, - "grad_norm": 2.099147319793701, - "learning_rate": 2.85032738674069e-05, - "loss": 0.6524, - "step": 145900 - }, - { - "epoch": 1.2898919712158985, - "grad_norm": 3.3113958835601807, - "learning_rate": 2.8501800479735025e-05, - "loss": 0.6098, - "step": 145910 - }, - { - "epoch": 1.2899803744762106, - "grad_norm": 4.148066997528076, - "learning_rate": 2.8500327092063157e-05, - "loss": 0.5473, - "step": 145920 - }, - { - "epoch": 1.290068777736523, - "grad_norm": 2.951728582382202, - "learning_rate": 2.849885370439129e-05, - "loss": 0.6273, - "step": 145930 - }, - { - "epoch": 1.290157180996835, - "grad_norm": 2.621436595916748, - "learning_rate": 2.8497380316719413e-05, - "loss": 0.6373, - "step": 145940 - }, - { - "epoch": 1.2902455842571474, - "grad_norm": 1.4980254173278809, - "learning_rate": 2.8495906929047545e-05, - "loss": 0.5354, - "step": 145950 - }, - { - "epoch": 1.2903339875174598, - "grad_norm": 5.7778239250183105, - "learning_rate": 2.8494433541375677e-05, - "loss": 0.586, - "step": 145960 - }, - { - "epoch": 1.2904223907777719, - "grad_norm": 1.3316553831100464, - "learning_rate": 2.8492960153703802e-05, - "loss": 0.6285, - "step": 145970 - }, - { - "epoch": 1.290510794038084, - "grad_norm": 5.821597099304199, - "learning_rate": 2.8491486766031934e-05, - "loss": 0.5095, - "step": 145980 - }, - { - "epoch": 1.2905991972983963, - "grad_norm": 1.9012861251831055, - "learning_rate": 2.8490013378360065e-05, - "loss": 0.5334, - "step": 145990 - }, - { - "epoch": 1.2906876005587087, - "grad_norm": 1.3801084756851196, - "learning_rate": 2.848853999068819e-05, - "loss": 0.6532, - "step": 146000 - }, - { - "epoch": 1.2907760038190208, - "grad_norm": 2.3948774337768555, - "learning_rate": 2.8487066603016322e-05, - "loss": 0.6381, - "step": 146010 - }, - { - "epoch": 1.2908644070793331, - "grad_norm": 1.9329140186309814, - "learning_rate": 2.8485593215344454e-05, - "loss": 0.6742, - "step": 146020 - }, - { - "epoch": 1.2909528103396453, - "grad_norm": 0.8544408679008484, - "learning_rate": 2.848411982767258e-05, - "loss": 0.5949, - "step": 146030 - }, - { - "epoch": 1.2910412135999576, - "grad_norm": 2.083409309387207, - "learning_rate": 2.848264644000071e-05, - "loss": 0.5571, - "step": 146040 - }, - { - "epoch": 1.2911296168602697, - "grad_norm": 1.5912474393844604, - "learning_rate": 2.8481173052328835e-05, - "loss": 0.46, - "step": 146050 - }, - { - "epoch": 1.291218020120582, - "grad_norm": 3.4063994884490967, - "learning_rate": 2.8479699664656967e-05, - "loss": 0.6153, - "step": 146060 - }, - { - "epoch": 1.2913064233808944, - "grad_norm": 4.82628870010376, - "learning_rate": 2.84782262769851e-05, - "loss": 0.4818, - "step": 146070 - }, - { - "epoch": 1.2913948266412065, - "grad_norm": 5.40212869644165, - "learning_rate": 2.8476752889313224e-05, - "loss": 0.6233, - "step": 146080 - }, - { - "epoch": 1.2914832299015186, - "grad_norm": 1.9495534896850586, - "learning_rate": 2.8475279501641356e-05, - "loss": 0.585, - "step": 146090 - }, - { - "epoch": 1.291571633161831, - "grad_norm": 6.7265305519104, - "learning_rate": 2.8473806113969487e-05, - "loss": 0.725, - "step": 146100 - }, - { - "epoch": 1.2916600364221433, - "grad_norm": 0.8092676997184753, - "learning_rate": 2.8472332726297612e-05, - "loss": 0.5393, - "step": 146110 - }, - { - "epoch": 1.2917484396824555, - "grad_norm": 2.290491819381714, - "learning_rate": 2.8470859338625744e-05, - "loss": 0.4181, - "step": 146120 - }, - { - "epoch": 1.2918368429427678, - "grad_norm": 8.55141830444336, - "learning_rate": 2.8469385950953876e-05, - "loss": 0.7677, - "step": 146130 - }, - { - "epoch": 1.29192524620308, - "grad_norm": 2.789681911468506, - "learning_rate": 2.8467912563282e-05, - "loss": 0.5504, - "step": 146140 - }, - { - "epoch": 1.2920136494633923, - "grad_norm": 7.410167217254639, - "learning_rate": 2.8466439175610132e-05, - "loss": 0.6975, - "step": 146150 - }, - { - "epoch": 1.2921020527237044, - "grad_norm": 6.615194797515869, - "learning_rate": 2.8464965787938257e-05, - "loss": 0.671, - "step": 146160 - }, - { - "epoch": 1.2921904559840167, - "grad_norm": 1.2965937852859497, - "learning_rate": 2.846349240026639e-05, - "loss": 0.4572, - "step": 146170 - }, - { - "epoch": 1.292278859244329, - "grad_norm": 8.206915855407715, - "learning_rate": 2.846201901259452e-05, - "loss": 0.5175, - "step": 146180 - }, - { - "epoch": 1.2923672625046412, - "grad_norm": 6.692469120025635, - "learning_rate": 2.8460545624922646e-05, - "loss": 0.5762, - "step": 146190 - }, - { - "epoch": 1.2924556657649533, - "grad_norm": 5.608756065368652, - "learning_rate": 2.8459072237250778e-05, - "loss": 0.5313, - "step": 146200 - }, - { - "epoch": 1.2925440690252656, - "grad_norm": 1.3925907611846924, - "learning_rate": 2.845759884957891e-05, - "loss": 0.6933, - "step": 146210 - }, - { - "epoch": 1.292632472285578, - "grad_norm": 6.7219014167785645, - "learning_rate": 2.8456125461907034e-05, - "loss": 0.5021, - "step": 146220 - }, - { - "epoch": 1.29272087554589, - "grad_norm": 1.2453153133392334, - "learning_rate": 2.8454652074235166e-05, - "loss": 0.7405, - "step": 146230 - }, - { - "epoch": 1.2928092788062024, - "grad_norm": 2.447101593017578, - "learning_rate": 2.8453178686563298e-05, - "loss": 0.5882, - "step": 146240 - }, - { - "epoch": 1.2928976820665146, - "grad_norm": 3.116987466812134, - "learning_rate": 2.8451705298891423e-05, - "loss": 0.6017, - "step": 146250 - }, - { - "epoch": 1.292986085326827, - "grad_norm": 2.5585999488830566, - "learning_rate": 2.8450231911219555e-05, - "loss": 0.6149, - "step": 146260 - }, - { - "epoch": 1.293074488587139, - "grad_norm": 1.6837371587753296, - "learning_rate": 2.844875852354768e-05, - "loss": 0.5378, - "step": 146270 - }, - { - "epoch": 1.2931628918474514, - "grad_norm": 1.533752679824829, - "learning_rate": 2.844728513587581e-05, - "loss": 0.5095, - "step": 146280 - }, - { - "epoch": 1.2932512951077637, - "grad_norm": 3.9867093563079834, - "learning_rate": 2.8445811748203943e-05, - "loss": 0.6708, - "step": 146290 - }, - { - "epoch": 1.2933396983680758, - "grad_norm": 2.2079825401306152, - "learning_rate": 2.8444338360532068e-05, - "loss": 0.7006, - "step": 146300 - }, - { - "epoch": 1.293428101628388, - "grad_norm": 2.876032829284668, - "learning_rate": 2.84428649728602e-05, - "loss": 0.604, - "step": 146310 - }, - { - "epoch": 1.2935165048887003, - "grad_norm": 1.8037502765655518, - "learning_rate": 2.844139158518833e-05, - "loss": 0.6907, - "step": 146320 - }, - { - "epoch": 1.2936049081490126, - "grad_norm": 5.662286758422852, - "learning_rate": 2.8439918197516456e-05, - "loss": 0.5766, - "step": 146330 - }, - { - "epoch": 1.2936933114093248, - "grad_norm": 5.239223003387451, - "learning_rate": 2.8438444809844588e-05, - "loss": 0.5212, - "step": 146340 - }, - { - "epoch": 1.293781714669637, - "grad_norm": 1.3765106201171875, - "learning_rate": 2.843697142217272e-05, - "loss": 0.6878, - "step": 146350 - }, - { - "epoch": 1.2938701179299492, - "grad_norm": 3.2985055446624756, - "learning_rate": 2.8435498034500845e-05, - "loss": 0.6275, - "step": 146360 - }, - { - "epoch": 1.2939585211902616, - "grad_norm": 1.4100604057312012, - "learning_rate": 2.8434024646828977e-05, - "loss": 0.6671, - "step": 146370 - }, - { - "epoch": 1.2940469244505737, - "grad_norm": 4.344699382781982, - "learning_rate": 2.84325512591571e-05, - "loss": 0.6651, - "step": 146380 - }, - { - "epoch": 1.294135327710886, - "grad_norm": 5.090414047241211, - "learning_rate": 2.8431077871485233e-05, - "loss": 0.6956, - "step": 146390 - }, - { - "epoch": 1.2942237309711981, - "grad_norm": 1.7224764823913574, - "learning_rate": 2.8429604483813365e-05, - "loss": 0.5881, - "step": 146400 - }, - { - "epoch": 1.2943121342315105, - "grad_norm": 3.1565380096435547, - "learning_rate": 2.842813109614149e-05, - "loss": 0.7066, - "step": 146410 - }, - { - "epoch": 1.2944005374918226, - "grad_norm": 8.755756378173828, - "learning_rate": 2.8426657708469622e-05, - "loss": 0.65, - "step": 146420 - }, - { - "epoch": 1.294488940752135, - "grad_norm": 1.3175544738769531, - "learning_rate": 2.8425184320797753e-05, - "loss": 0.5405, - "step": 146430 - }, - { - "epoch": 1.2945773440124473, - "grad_norm": 1.4170767068862915, - "learning_rate": 2.842371093312588e-05, - "loss": 0.6276, - "step": 146440 - }, - { - "epoch": 1.2946657472727594, - "grad_norm": 2.0644989013671875, - "learning_rate": 2.842223754545401e-05, - "loss": 0.5847, - "step": 146450 - }, - { - "epoch": 1.2947541505330715, - "grad_norm": 3.948702812194824, - "learning_rate": 2.8420764157782142e-05, - "loss": 0.6221, - "step": 146460 - }, - { - "epoch": 1.2948425537933839, - "grad_norm": 4.97209358215332, - "learning_rate": 2.8419290770110267e-05, - "loss": 0.6305, - "step": 146470 - }, - { - "epoch": 1.2949309570536962, - "grad_norm": 2.394826889038086, - "learning_rate": 2.84178173824384e-05, - "loss": 0.6793, - "step": 146480 - }, - { - "epoch": 1.2950193603140083, - "grad_norm": 1.4477965831756592, - "learning_rate": 2.841634399476653e-05, - "loss": 0.7527, - "step": 146490 - }, - { - "epoch": 1.2951077635743207, - "grad_norm": 2.1424484252929688, - "learning_rate": 2.8414870607094655e-05, - "loss": 0.5579, - "step": 146500 - }, - { - "epoch": 1.2951961668346328, - "grad_norm": 5.241677284240723, - "learning_rate": 2.8413397219422787e-05, - "loss": 0.5501, - "step": 146510 - }, - { - "epoch": 1.2952845700949451, - "grad_norm": 1.9471867084503174, - "learning_rate": 2.8411923831750915e-05, - "loss": 0.6399, - "step": 146520 - }, - { - "epoch": 1.2953729733552573, - "grad_norm": 10.179215431213379, - "learning_rate": 2.8410450444079044e-05, - "loss": 0.5723, - "step": 146530 - }, - { - "epoch": 1.2954613766155696, - "grad_norm": 2.3967907428741455, - "learning_rate": 2.8408977056407176e-05, - "loss": 0.4262, - "step": 146540 - }, - { - "epoch": 1.295549779875882, - "grad_norm": 1.2617415189743042, - "learning_rate": 2.8407503668735304e-05, - "loss": 0.4513, - "step": 146550 - }, - { - "epoch": 1.295638183136194, - "grad_norm": 1.8135316371917725, - "learning_rate": 2.8406030281063432e-05, - "loss": 0.5743, - "step": 146560 - }, - { - "epoch": 1.2957265863965062, - "grad_norm": 2.815929651260376, - "learning_rate": 2.8404556893391564e-05, - "loss": 0.7244, - "step": 146570 - }, - { - "epoch": 1.2958149896568185, - "grad_norm": 2.680691957473755, - "learning_rate": 2.8403083505719692e-05, - "loss": 0.6356, - "step": 146580 - }, - { - "epoch": 1.2959033929171309, - "grad_norm": 2.231788396835327, - "learning_rate": 2.840161011804782e-05, - "loss": 0.5356, - "step": 146590 - }, - { - "epoch": 1.295991796177443, - "grad_norm": 1.815050721168518, - "learning_rate": 2.8400136730375952e-05, - "loss": 0.5368, - "step": 146600 - }, - { - "epoch": 1.2960801994377553, - "grad_norm": 1.2623122930526733, - "learning_rate": 2.839866334270408e-05, - "loss": 0.7523, - "step": 146610 - }, - { - "epoch": 1.2961686026980674, - "grad_norm": 1.5876820087432861, - "learning_rate": 2.839718995503221e-05, - "loss": 0.7272, - "step": 146620 - }, - { - "epoch": 1.2962570059583798, - "grad_norm": 1.8999567031860352, - "learning_rate": 2.8395716567360337e-05, - "loss": 0.6806, - "step": 146630 - }, - { - "epoch": 1.296345409218692, - "grad_norm": 4.445340156555176, - "learning_rate": 2.839424317968847e-05, - "loss": 0.5926, - "step": 146640 - }, - { - "epoch": 1.2964338124790042, - "grad_norm": 19.329952239990234, - "learning_rate": 2.8392769792016598e-05, - "loss": 0.6271, - "step": 146650 - }, - { - "epoch": 1.2965222157393166, - "grad_norm": 1.148324966430664, - "learning_rate": 2.8391296404344726e-05, - "loss": 0.5235, - "step": 146660 - }, - { - "epoch": 1.2966106189996287, - "grad_norm": 2.467998743057251, - "learning_rate": 2.8389823016672858e-05, - "loss": 0.5469, - "step": 146670 - }, - { - "epoch": 1.2966990222599408, - "grad_norm": 1.8557848930358887, - "learning_rate": 2.8388349629000986e-05, - "loss": 0.6129, - "step": 146680 - }, - { - "epoch": 1.2967874255202532, - "grad_norm": 8.01926326751709, - "learning_rate": 2.8386876241329114e-05, - "loss": 0.7484, - "step": 146690 - }, - { - "epoch": 1.2968758287805655, - "grad_norm": 5.732945442199707, - "learning_rate": 2.8385402853657246e-05, - "loss": 0.5079, - "step": 146700 - }, - { - "epoch": 1.2969642320408776, - "grad_norm": 2.4821481704711914, - "learning_rate": 2.8383929465985374e-05, - "loss": 0.6717, - "step": 146710 - }, - { - "epoch": 1.29705263530119, - "grad_norm": 6.604349613189697, - "learning_rate": 2.8382456078313503e-05, - "loss": 0.6876, - "step": 146720 - }, - { - "epoch": 1.297141038561502, - "grad_norm": 2.479754686355591, - "learning_rate": 2.8380982690641635e-05, - "loss": 0.5344, - "step": 146730 - }, - { - "epoch": 1.2972294418218144, - "grad_norm": 3.167471170425415, - "learning_rate": 2.837950930296976e-05, - "loss": 0.5937, - "step": 146740 - }, - { - "epoch": 1.2973178450821266, - "grad_norm": 2.060343027114868, - "learning_rate": 2.837803591529789e-05, - "loss": 0.5797, - "step": 146750 - }, - { - "epoch": 1.297406248342439, - "grad_norm": 4.025713920593262, - "learning_rate": 2.8376562527626023e-05, - "loss": 0.6287, - "step": 146760 - }, - { - "epoch": 1.2974946516027512, - "grad_norm": 6.159024715423584, - "learning_rate": 2.8375089139954148e-05, - "loss": 0.689, - "step": 146770 - }, - { - "epoch": 1.2975830548630634, - "grad_norm": 1.5408360958099365, - "learning_rate": 2.837361575228228e-05, - "loss": 0.505, - "step": 146780 - }, - { - "epoch": 1.2976714581233755, - "grad_norm": 2.154221534729004, - "learning_rate": 2.837214236461041e-05, - "loss": 0.6672, - "step": 146790 - }, - { - "epoch": 1.2977598613836878, - "grad_norm": 8.126728057861328, - "learning_rate": 2.8370668976938536e-05, - "loss": 0.6237, - "step": 146800 - }, - { - "epoch": 1.2978482646440002, - "grad_norm": 7.023776054382324, - "learning_rate": 2.8369195589266668e-05, - "loss": 0.4704, - "step": 146810 - }, - { - "epoch": 1.2979366679043123, - "grad_norm": 1.201236367225647, - "learning_rate": 2.83677222015948e-05, - "loss": 0.4981, - "step": 146820 - }, - { - "epoch": 1.2980250711646246, - "grad_norm": 5.6378655433654785, - "learning_rate": 2.8366248813922925e-05, - "loss": 0.675, - "step": 146830 - }, - { - "epoch": 1.2981134744249367, - "grad_norm": 6.840307712554932, - "learning_rate": 2.8364775426251057e-05, - "loss": 0.7116, - "step": 146840 - }, - { - "epoch": 1.298201877685249, - "grad_norm": 6.086009502410889, - "learning_rate": 2.836330203857918e-05, - "loss": 0.5584, - "step": 146850 - }, - { - "epoch": 1.2982902809455612, - "grad_norm": 3.1375789642333984, - "learning_rate": 2.8361828650907313e-05, - "loss": 0.4644, - "step": 146860 - }, - { - "epoch": 1.2983786842058735, - "grad_norm": 2.1103127002716064, - "learning_rate": 2.8360355263235445e-05, - "loss": 0.5786, - "step": 146870 - }, - { - "epoch": 1.2984670874661859, - "grad_norm": 7.147364139556885, - "learning_rate": 2.835888187556357e-05, - "loss": 0.5104, - "step": 146880 - }, - { - "epoch": 1.298555490726498, - "grad_norm": 1.9633492231369019, - "learning_rate": 2.8357408487891702e-05, - "loss": 0.6729, - "step": 146890 - }, - { - "epoch": 1.2986438939868101, - "grad_norm": 3.0465798377990723, - "learning_rate": 2.8355935100219834e-05, - "loss": 0.5347, - "step": 146900 - }, - { - "epoch": 1.2987322972471225, - "grad_norm": 2.7077319622039795, - "learning_rate": 2.835446171254796e-05, - "loss": 0.5477, - "step": 146910 - }, - { - "epoch": 1.2988207005074348, - "grad_norm": 1.6707226037979126, - "learning_rate": 2.835298832487609e-05, - "loss": 0.6702, - "step": 146920 - }, - { - "epoch": 1.298909103767747, - "grad_norm": 4.2694573402404785, - "learning_rate": 2.8351514937204222e-05, - "loss": 0.7365, - "step": 146930 - }, - { - "epoch": 1.2989975070280593, - "grad_norm": 2.267035961151123, - "learning_rate": 2.8350041549532347e-05, - "loss": 0.6377, - "step": 146940 - }, - { - "epoch": 1.2990859102883714, - "grad_norm": 1.021494746208191, - "learning_rate": 2.834856816186048e-05, - "loss": 0.5388, - "step": 146950 - }, - { - "epoch": 1.2991743135486837, - "grad_norm": 6.940701484680176, - "learning_rate": 2.834709477418861e-05, - "loss": 0.59, - "step": 146960 - }, - { - "epoch": 1.2992627168089959, - "grad_norm": 2.500112771987915, - "learning_rate": 2.8345621386516735e-05, - "loss": 0.5426, - "step": 146970 - }, - { - "epoch": 1.2993511200693082, - "grad_norm": 1.5883054733276367, - "learning_rate": 2.8344147998844867e-05, - "loss": 0.6212, - "step": 146980 - }, - { - "epoch": 1.2994395233296203, - "grad_norm": 2.620814800262451, - "learning_rate": 2.8342674611172992e-05, - "loss": 0.6575, - "step": 146990 - }, - { - "epoch": 1.2995279265899327, - "grad_norm": 2.0864148139953613, - "learning_rate": 2.8341201223501124e-05, - "loss": 0.5719, - "step": 147000 - }, - { - "epoch": 1.2996163298502448, - "grad_norm": 1.3372048139572144, - "learning_rate": 2.8339727835829256e-05, - "loss": 0.6043, - "step": 147010 - }, - { - "epoch": 1.2997047331105571, - "grad_norm": 2.6868886947631836, - "learning_rate": 2.833825444815738e-05, - "loss": 0.5885, - "step": 147020 - }, - { - "epoch": 1.2997931363708695, - "grad_norm": 1.4888969659805298, - "learning_rate": 2.8336781060485512e-05, - "loss": 0.5749, - "step": 147030 - }, - { - "epoch": 1.2998815396311816, - "grad_norm": 2.819533109664917, - "learning_rate": 2.8335307672813644e-05, - "loss": 0.5596, - "step": 147040 - }, - { - "epoch": 1.2999699428914937, - "grad_norm": 1.919136643409729, - "learning_rate": 2.833383428514177e-05, - "loss": 0.5749, - "step": 147050 - }, - { - "epoch": 1.300058346151806, - "grad_norm": 5.6132917404174805, - "learning_rate": 2.83323608974699e-05, - "loss": 0.603, - "step": 147060 - }, - { - "epoch": 1.3001467494121184, - "grad_norm": 12.341681480407715, - "learning_rate": 2.8330887509798033e-05, - "loss": 0.702, - "step": 147070 - }, - { - "epoch": 1.3002351526724305, - "grad_norm": 11.892692565917969, - "learning_rate": 2.8329414122126157e-05, - "loss": 0.5466, - "step": 147080 - }, - { - "epoch": 1.3003235559327428, - "grad_norm": 2.8739593029022217, - "learning_rate": 2.832794073445429e-05, - "loss": 0.5766, - "step": 147090 - }, - { - "epoch": 1.300411959193055, - "grad_norm": 8.932125091552734, - "learning_rate": 2.8326467346782414e-05, - "loss": 0.6745, - "step": 147100 - }, - { - "epoch": 1.3005003624533673, - "grad_norm": 2.2936980724334717, - "learning_rate": 2.8324993959110546e-05, - "loss": 0.5872, - "step": 147110 - }, - { - "epoch": 1.3005887657136794, - "grad_norm": 2.2789089679718018, - "learning_rate": 2.8323520571438678e-05, - "loss": 0.5136, - "step": 147120 - }, - { - "epoch": 1.3006771689739918, - "grad_norm": 3.512072801589966, - "learning_rate": 2.8322047183766803e-05, - "loss": 0.5661, - "step": 147130 - }, - { - "epoch": 1.3007655722343041, - "grad_norm": 2.171337604522705, - "learning_rate": 2.8320573796094934e-05, - "loss": 0.5836, - "step": 147140 - }, - { - "epoch": 1.3008539754946162, - "grad_norm": 2.246044635772705, - "learning_rate": 2.8319100408423066e-05, - "loss": 0.5959, - "step": 147150 - }, - { - "epoch": 1.3009423787549284, - "grad_norm": 1.1269278526306152, - "learning_rate": 2.831762702075119e-05, - "loss": 0.5188, - "step": 147160 - }, - { - "epoch": 1.3010307820152407, - "grad_norm": 3.1443018913269043, - "learning_rate": 2.8316153633079323e-05, - "loss": 0.5548, - "step": 147170 - }, - { - "epoch": 1.301119185275553, - "grad_norm": 4.107034683227539, - "learning_rate": 2.8314680245407455e-05, - "loss": 0.7024, - "step": 147180 - }, - { - "epoch": 1.3012075885358652, - "grad_norm": 0.7410919666290283, - "learning_rate": 2.831320685773558e-05, - "loss": 0.5675, - "step": 147190 - }, - { - "epoch": 1.3012959917961775, - "grad_norm": 2.4453177452087402, - "learning_rate": 2.831173347006371e-05, - "loss": 0.646, - "step": 147200 - }, - { - "epoch": 1.3013843950564896, - "grad_norm": 3.1775753498077393, - "learning_rate": 2.8310260082391836e-05, - "loss": 0.5698, - "step": 147210 - }, - { - "epoch": 1.301472798316802, - "grad_norm": 3.8935375213623047, - "learning_rate": 2.8308786694719968e-05, - "loss": 0.581, - "step": 147220 - }, - { - "epoch": 1.301561201577114, - "grad_norm": 1.562652587890625, - "learning_rate": 2.83073133070481e-05, - "loss": 0.6022, - "step": 147230 - }, - { - "epoch": 1.3016496048374264, - "grad_norm": 9.213784217834473, - "learning_rate": 2.8305839919376225e-05, - "loss": 0.6211, - "step": 147240 - }, - { - "epoch": 1.3017380080977388, - "grad_norm": 2.685655117034912, - "learning_rate": 2.8304366531704356e-05, - "loss": 0.5941, - "step": 147250 - }, - { - "epoch": 1.3018264113580509, - "grad_norm": 1.4757558107376099, - "learning_rate": 2.8302893144032488e-05, - "loss": 0.508, - "step": 147260 - }, - { - "epoch": 1.301914814618363, - "grad_norm": 1.7871500253677368, - "learning_rate": 2.8301419756360613e-05, - "loss": 0.7016, - "step": 147270 - }, - { - "epoch": 1.3020032178786753, - "grad_norm": 1.066924810409546, - "learning_rate": 2.8299946368688745e-05, - "loss": 0.6228, - "step": 147280 - }, - { - "epoch": 1.3020916211389877, - "grad_norm": 2.3574273586273193, - "learning_rate": 2.8298472981016877e-05, - "loss": 0.6102, - "step": 147290 - }, - { - "epoch": 1.3021800243992998, - "grad_norm": 8.243714332580566, - "learning_rate": 2.8296999593345e-05, - "loss": 0.6085, - "step": 147300 - }, - { - "epoch": 1.3022684276596121, - "grad_norm": 2.1952390670776367, - "learning_rate": 2.8295526205673133e-05, - "loss": 0.5912, - "step": 147310 - }, - { - "epoch": 1.3023568309199243, - "grad_norm": 2.745427370071411, - "learning_rate": 2.8294052818001258e-05, - "loss": 0.6867, - "step": 147320 - }, - { - "epoch": 1.3024452341802366, - "grad_norm": 1.7434247732162476, - "learning_rate": 2.829257943032939e-05, - "loss": 0.6049, - "step": 147330 - }, - { - "epoch": 1.3025336374405487, - "grad_norm": 2.7690441608428955, - "learning_rate": 2.8291106042657522e-05, - "loss": 0.6213, - "step": 147340 - }, - { - "epoch": 1.302622040700861, - "grad_norm": 3.6132118701934814, - "learning_rate": 2.8289632654985647e-05, - "loss": 0.6111, - "step": 147350 - }, - { - "epoch": 1.3027104439611734, - "grad_norm": 1.6341034173965454, - "learning_rate": 2.828815926731378e-05, - "loss": 0.5975, - "step": 147360 - }, - { - "epoch": 1.3027988472214855, - "grad_norm": 5.649781703948975, - "learning_rate": 2.828668587964191e-05, - "loss": 0.6634, - "step": 147370 - }, - { - "epoch": 1.3028872504817977, - "grad_norm": 1.2415521144866943, - "learning_rate": 2.8285212491970035e-05, - "loss": 0.4889, - "step": 147380 - }, - { - "epoch": 1.30297565374211, - "grad_norm": 2.9956023693084717, - "learning_rate": 2.8283739104298167e-05, - "loss": 0.5689, - "step": 147390 - }, - { - "epoch": 1.3030640570024223, - "grad_norm": 2.027679681777954, - "learning_rate": 2.82822657166263e-05, - "loss": 0.5782, - "step": 147400 - }, - { - "epoch": 1.3031524602627345, - "grad_norm": 8.587875366210938, - "learning_rate": 2.8280792328954424e-05, - "loss": 0.5384, - "step": 147410 - }, - { - "epoch": 1.3032408635230468, - "grad_norm": 2.378728151321411, - "learning_rate": 2.8279318941282555e-05, - "loss": 0.5795, - "step": 147420 - }, - { - "epoch": 1.303329266783359, - "grad_norm": 2.8450756072998047, - "learning_rate": 2.8277845553610687e-05, - "loss": 0.5438, - "step": 147430 - }, - { - "epoch": 1.3034176700436713, - "grad_norm": 4.5527496337890625, - "learning_rate": 2.8276372165938812e-05, - "loss": 0.6895, - "step": 147440 - }, - { - "epoch": 1.3035060733039834, - "grad_norm": 3.694490671157837, - "learning_rate": 2.8274898778266944e-05, - "loss": 0.6177, - "step": 147450 - }, - { - "epoch": 1.3035944765642957, - "grad_norm": 1.7594057321548462, - "learning_rate": 2.8273425390595072e-05, - "loss": 0.5451, - "step": 147460 - }, - { - "epoch": 1.303682879824608, - "grad_norm": 2.53791880607605, - "learning_rate": 2.82719520029232e-05, - "loss": 0.4787, - "step": 147470 - }, - { - "epoch": 1.3037712830849202, - "grad_norm": 5.925833225250244, - "learning_rate": 2.8270478615251332e-05, - "loss": 0.6858, - "step": 147480 - }, - { - "epoch": 1.3038596863452323, - "grad_norm": 2.1145951747894287, - "learning_rate": 2.826900522757946e-05, - "loss": 0.6803, - "step": 147490 - }, - { - "epoch": 1.3039480896055446, - "grad_norm": 2.356001615524292, - "learning_rate": 2.826753183990759e-05, - "loss": 0.5678, - "step": 147500 - }, - { - "epoch": 1.304036492865857, - "grad_norm": 0.8765555620193481, - "learning_rate": 2.826605845223572e-05, - "loss": 0.5352, - "step": 147510 - }, - { - "epoch": 1.304124896126169, - "grad_norm": 2.0448122024536133, - "learning_rate": 2.826458506456385e-05, - "loss": 0.5859, - "step": 147520 - }, - { - "epoch": 1.3042132993864815, - "grad_norm": 3.7324912548065186, - "learning_rate": 2.8263111676891977e-05, - "loss": 0.7037, - "step": 147530 - }, - { - "epoch": 1.3043017026467936, - "grad_norm": 6.3869147300720215, - "learning_rate": 2.826163828922011e-05, - "loss": 0.5142, - "step": 147540 - }, - { - "epoch": 1.304390105907106, - "grad_norm": 3.6988437175750732, - "learning_rate": 2.8260164901548238e-05, - "loss": 0.5108, - "step": 147550 - }, - { - "epoch": 1.304478509167418, - "grad_norm": 3.6429684162139893, - "learning_rate": 2.8258691513876366e-05, - "loss": 0.6565, - "step": 147560 - }, - { - "epoch": 1.3045669124277304, - "grad_norm": 2.3001108169555664, - "learning_rate": 2.8257218126204494e-05, - "loss": 0.5487, - "step": 147570 - }, - { - "epoch": 1.3046553156880425, - "grad_norm": 2.0703392028808594, - "learning_rate": 2.8255744738532626e-05, - "loss": 0.6662, - "step": 147580 - }, - { - "epoch": 1.3047437189483548, - "grad_norm": 4.685385704040527, - "learning_rate": 2.8254271350860754e-05, - "loss": 0.6427, - "step": 147590 - }, - { - "epoch": 1.304832122208667, - "grad_norm": 2.031373977661133, - "learning_rate": 2.8252797963188883e-05, - "loss": 0.7154, - "step": 147600 - }, - { - "epoch": 1.3049205254689793, - "grad_norm": 5.000436782836914, - "learning_rate": 2.8251324575517014e-05, - "loss": 0.6124, - "step": 147610 - }, - { - "epoch": 1.3050089287292916, - "grad_norm": 2.333563804626465, - "learning_rate": 2.8249851187845143e-05, - "loss": 0.6567, - "step": 147620 - }, - { - "epoch": 1.3050973319896038, - "grad_norm": 1.7319918870925903, - "learning_rate": 2.824837780017327e-05, - "loss": 0.5404, - "step": 147630 - }, - { - "epoch": 1.3051857352499159, - "grad_norm": 1.6542637348175049, - "learning_rate": 2.8246904412501403e-05, - "loss": 0.6069, - "step": 147640 - }, - { - "epoch": 1.3052741385102282, - "grad_norm": 8.284990310668945, - "learning_rate": 2.824543102482953e-05, - "loss": 0.5799, - "step": 147650 - }, - { - "epoch": 1.3053625417705406, - "grad_norm": 2.2346031665802, - "learning_rate": 2.824395763715766e-05, - "loss": 0.5605, - "step": 147660 - }, - { - "epoch": 1.3054509450308527, - "grad_norm": 12.373358726501465, - "learning_rate": 2.824248424948579e-05, - "loss": 0.5377, - "step": 147670 - }, - { - "epoch": 1.305539348291165, - "grad_norm": 2.6278672218322754, - "learning_rate": 2.8241010861813916e-05, - "loss": 0.5863, - "step": 147680 - }, - { - "epoch": 1.3056277515514771, - "grad_norm": 1.561033010482788, - "learning_rate": 2.8239537474142048e-05, - "loss": 0.6042, - "step": 147690 - }, - { - "epoch": 1.3057161548117895, - "grad_norm": 4.284996032714844, - "learning_rate": 2.823806408647018e-05, - "loss": 0.6056, - "step": 147700 - }, - { - "epoch": 1.3058045580721016, - "grad_norm": 1.2217775583267212, - "learning_rate": 2.8236590698798305e-05, - "loss": 0.5429, - "step": 147710 - }, - { - "epoch": 1.305892961332414, - "grad_norm": 1.2879341840744019, - "learning_rate": 2.8235117311126436e-05, - "loss": 0.5125, - "step": 147720 - }, - { - "epoch": 1.3059813645927263, - "grad_norm": 6.19607400894165, - "learning_rate": 2.8233643923454568e-05, - "loss": 0.5391, - "step": 147730 - }, - { - "epoch": 1.3060697678530384, - "grad_norm": 2.394674777984619, - "learning_rate": 2.8232170535782693e-05, - "loss": 0.5223, - "step": 147740 - }, - { - "epoch": 1.3061581711133505, - "grad_norm": 2.607071876525879, - "learning_rate": 2.8230697148110825e-05, - "loss": 0.5002, - "step": 147750 - }, - { - "epoch": 1.3062465743736629, - "grad_norm": 3.993256092071533, - "learning_rate": 2.8229223760438957e-05, - "loss": 0.6738, - "step": 147760 - }, - { - "epoch": 1.3063349776339752, - "grad_norm": 2.1710879802703857, - "learning_rate": 2.822775037276708e-05, - "loss": 0.5527, - "step": 147770 - }, - { - "epoch": 1.3064233808942873, - "grad_norm": 3.0856449604034424, - "learning_rate": 2.8226276985095213e-05, - "loss": 0.6058, - "step": 147780 - }, - { - "epoch": 1.3065117841545997, - "grad_norm": 1.846940040588379, - "learning_rate": 2.822480359742334e-05, - "loss": 0.6034, - "step": 147790 - }, - { - "epoch": 1.3066001874149118, - "grad_norm": 1.2857451438903809, - "learning_rate": 2.822333020975147e-05, - "loss": 0.5157, - "step": 147800 - }, - { - "epoch": 1.3066885906752241, - "grad_norm": 6.651855945587158, - "learning_rate": 2.8221856822079602e-05, - "loss": 0.7258, - "step": 147810 - }, - { - "epoch": 1.3067769939355363, - "grad_norm": 2.238550901412964, - "learning_rate": 2.8220383434407727e-05, - "loss": 0.4933, - "step": 147820 - }, - { - "epoch": 1.3068653971958486, - "grad_norm": 5.695883750915527, - "learning_rate": 2.821891004673586e-05, - "loss": 0.6126, - "step": 147830 - }, - { - "epoch": 1.306953800456161, - "grad_norm": 1.7693698406219482, - "learning_rate": 2.821743665906399e-05, - "loss": 0.5388, - "step": 147840 - }, - { - "epoch": 1.307042203716473, - "grad_norm": 2.918375015258789, - "learning_rate": 2.8215963271392115e-05, - "loss": 0.7853, - "step": 147850 - }, - { - "epoch": 1.3071306069767852, - "grad_norm": 1.3856827020645142, - "learning_rate": 2.8214489883720247e-05, - "loss": 0.6787, - "step": 147860 - }, - { - "epoch": 1.3072190102370975, - "grad_norm": 4.287835121154785, - "learning_rate": 2.821301649604838e-05, - "loss": 0.5841, - "step": 147870 - }, - { - "epoch": 1.3073074134974099, - "grad_norm": 5.601003170013428, - "learning_rate": 2.8211543108376504e-05, - "loss": 0.5665, - "step": 147880 - }, - { - "epoch": 1.307395816757722, - "grad_norm": 0.9920431971549988, - "learning_rate": 2.8210069720704635e-05, - "loss": 0.5784, - "step": 147890 - }, - { - "epoch": 1.3074842200180343, - "grad_norm": 1.321824073791504, - "learning_rate": 2.8208596333032767e-05, - "loss": 0.5604, - "step": 147900 - }, - { - "epoch": 1.3075726232783464, - "grad_norm": 2.0531821250915527, - "learning_rate": 2.8207122945360892e-05, - "loss": 0.6935, - "step": 147910 - }, - { - "epoch": 1.3076610265386588, - "grad_norm": 2.335388422012329, - "learning_rate": 2.8205649557689024e-05, - "loss": 0.6757, - "step": 147920 - }, - { - "epoch": 1.307749429798971, - "grad_norm": 2.0323305130004883, - "learning_rate": 2.820417617001715e-05, - "loss": 0.5602, - "step": 147930 - }, - { - "epoch": 1.3078378330592833, - "grad_norm": 2.154433250427246, - "learning_rate": 2.820270278234528e-05, - "loss": 0.585, - "step": 147940 - }, - { - "epoch": 1.3079262363195956, - "grad_norm": 7.445944309234619, - "learning_rate": 2.8201229394673412e-05, - "loss": 0.5075, - "step": 147950 - }, - { - "epoch": 1.3080146395799077, - "grad_norm": 2.845095157623291, - "learning_rate": 2.8199756007001537e-05, - "loss": 0.5999, - "step": 147960 - }, - { - "epoch": 1.3081030428402198, - "grad_norm": 3.5085434913635254, - "learning_rate": 2.819828261932967e-05, - "loss": 0.6902, - "step": 147970 - }, - { - "epoch": 1.3081914461005322, - "grad_norm": 1.5698795318603516, - "learning_rate": 2.81968092316578e-05, - "loss": 0.5557, - "step": 147980 - }, - { - "epoch": 1.3082798493608445, - "grad_norm": 2.0982470512390137, - "learning_rate": 2.8195335843985926e-05, - "loss": 0.598, - "step": 147990 - }, - { - "epoch": 1.3083682526211566, - "grad_norm": 5.38260555267334, - "learning_rate": 2.8193862456314058e-05, - "loss": 0.5967, - "step": 148000 - }, - { - "epoch": 1.308456655881469, - "grad_norm": 8.967691421508789, - "learning_rate": 2.819238906864219e-05, - "loss": 0.6884, - "step": 148010 - }, - { - "epoch": 1.308545059141781, - "grad_norm": 2.423182725906372, - "learning_rate": 2.8190915680970314e-05, - "loss": 0.5464, - "step": 148020 - }, - { - "epoch": 1.3086334624020934, - "grad_norm": 1.3502552509307861, - "learning_rate": 2.8189442293298446e-05, - "loss": 0.6193, - "step": 148030 - }, - { - "epoch": 1.3087218656624056, - "grad_norm": 1.9717388153076172, - "learning_rate": 2.818796890562657e-05, - "loss": 0.6013, - "step": 148040 - }, - { - "epoch": 1.308810268922718, - "grad_norm": 10.02807903289795, - "learning_rate": 2.8186495517954703e-05, - "loss": 0.5545, - "step": 148050 - }, - { - "epoch": 1.3088986721830302, - "grad_norm": 2.5539464950561523, - "learning_rate": 2.8185022130282834e-05, - "loss": 0.6676, - "step": 148060 - }, - { - "epoch": 1.3089870754433424, - "grad_norm": 11.810771942138672, - "learning_rate": 2.818354874261096e-05, - "loss": 0.7359, - "step": 148070 - }, - { - "epoch": 1.3090754787036545, - "grad_norm": 3.780825138092041, - "learning_rate": 2.818207535493909e-05, - "loss": 0.5766, - "step": 148080 - }, - { - "epoch": 1.3091638819639668, - "grad_norm": 2.876922369003296, - "learning_rate": 2.8180601967267223e-05, - "loss": 0.6735, - "step": 148090 - }, - { - "epoch": 1.3092522852242792, - "grad_norm": 8.37081241607666, - "learning_rate": 2.8179128579595348e-05, - "loss": 0.5598, - "step": 148100 - }, - { - "epoch": 1.3093406884845913, - "grad_norm": 2.4037575721740723, - "learning_rate": 2.817765519192348e-05, - "loss": 0.6646, - "step": 148110 - }, - { - "epoch": 1.3094290917449036, - "grad_norm": 1.8648908138275146, - "learning_rate": 2.817618180425161e-05, - "loss": 0.4849, - "step": 148120 - }, - { - "epoch": 1.3095174950052157, - "grad_norm": 12.503753662109375, - "learning_rate": 2.8174708416579736e-05, - "loss": 0.5475, - "step": 148130 - }, - { - "epoch": 1.309605898265528, - "grad_norm": 1.414025902748108, - "learning_rate": 2.8173235028907868e-05, - "loss": 0.6481, - "step": 148140 - }, - { - "epoch": 1.3096943015258402, - "grad_norm": 3.0190954208374023, - "learning_rate": 2.8171761641235993e-05, - "loss": 0.6249, - "step": 148150 - }, - { - "epoch": 1.3097827047861526, - "grad_norm": 3.135728120803833, - "learning_rate": 2.8170288253564125e-05, - "loss": 0.6229, - "step": 148160 - }, - { - "epoch": 1.3098711080464647, - "grad_norm": 3.5395970344543457, - "learning_rate": 2.8168814865892256e-05, - "loss": 0.6476, - "step": 148170 - }, - { - "epoch": 1.309959511306777, - "grad_norm": 3.3132314682006836, - "learning_rate": 2.816734147822038e-05, - "loss": 0.599, - "step": 148180 - }, - { - "epoch": 1.3100479145670891, - "grad_norm": 2.0649218559265137, - "learning_rate": 2.8165868090548513e-05, - "loss": 0.475, - "step": 148190 - }, - { - "epoch": 1.3101363178274015, - "grad_norm": 2.7086780071258545, - "learning_rate": 2.8164394702876645e-05, - "loss": 0.6042, - "step": 148200 - }, - { - "epoch": 1.3102247210877138, - "grad_norm": 2.1677048206329346, - "learning_rate": 2.816292131520477e-05, - "loss": 0.6134, - "step": 148210 - }, - { - "epoch": 1.310313124348026, - "grad_norm": 3.4466402530670166, - "learning_rate": 2.81614479275329e-05, - "loss": 0.6503, - "step": 148220 - }, - { - "epoch": 1.310401527608338, - "grad_norm": 2.27215576171875, - "learning_rate": 2.8159974539861033e-05, - "loss": 0.6277, - "step": 148230 - }, - { - "epoch": 1.3104899308686504, - "grad_norm": 1.3102741241455078, - "learning_rate": 2.815850115218916e-05, - "loss": 0.6983, - "step": 148240 - }, - { - "epoch": 1.3105783341289627, - "grad_norm": 6.3948540687561035, - "learning_rate": 2.815702776451729e-05, - "loss": 0.5522, - "step": 148250 - }, - { - "epoch": 1.3106667373892749, - "grad_norm": 4.992373943328857, - "learning_rate": 2.8155554376845415e-05, - "loss": 0.6298, - "step": 148260 - }, - { - "epoch": 1.3107551406495872, - "grad_norm": 3.187575340270996, - "learning_rate": 2.8154080989173547e-05, - "loss": 0.476, - "step": 148270 - }, - { - "epoch": 1.3108435439098993, - "grad_norm": 2.759039878845215, - "learning_rate": 2.815260760150168e-05, - "loss": 0.7035, - "step": 148280 - }, - { - "epoch": 1.3109319471702117, - "grad_norm": 5.789707183837891, - "learning_rate": 2.8151134213829803e-05, - "loss": 0.733, - "step": 148290 - }, - { - "epoch": 1.3110203504305238, - "grad_norm": 1.0783339738845825, - "learning_rate": 2.8149660826157935e-05, - "loss": 0.6591, - "step": 148300 - }, - { - "epoch": 1.3111087536908361, - "grad_norm": 2.655055046081543, - "learning_rate": 2.8148187438486067e-05, - "loss": 0.6063, - "step": 148310 - }, - { - "epoch": 1.3111971569511485, - "grad_norm": 3.78544020652771, - "learning_rate": 2.8146714050814192e-05, - "loss": 0.7063, - "step": 148320 - }, - { - "epoch": 1.3112855602114606, - "grad_norm": 4.102067470550537, - "learning_rate": 2.8145240663142324e-05, - "loss": 0.5678, - "step": 148330 - }, - { - "epoch": 1.3113739634717727, - "grad_norm": 1.2500070333480835, - "learning_rate": 2.8143767275470455e-05, - "loss": 0.5558, - "step": 148340 - }, - { - "epoch": 1.311462366732085, - "grad_norm": 0.7118691205978394, - "learning_rate": 2.814229388779858e-05, - "loss": 0.5641, - "step": 148350 - }, - { - "epoch": 1.3115507699923974, - "grad_norm": 2.446852207183838, - "learning_rate": 2.8140820500126712e-05, - "loss": 0.7724, - "step": 148360 - }, - { - "epoch": 1.3116391732527095, - "grad_norm": 2.7543790340423584, - "learning_rate": 2.8139347112454844e-05, - "loss": 0.6487, - "step": 148370 - }, - { - "epoch": 1.3117275765130219, - "grad_norm": 3.6966869831085205, - "learning_rate": 2.813787372478297e-05, - "loss": 0.5965, - "step": 148380 - }, - { - "epoch": 1.311815979773334, - "grad_norm": 3.447160243988037, - "learning_rate": 2.81364003371111e-05, - "loss": 0.5269, - "step": 148390 - }, - { - "epoch": 1.3119043830336463, - "grad_norm": 1.771708369255066, - "learning_rate": 2.813492694943923e-05, - "loss": 0.6768, - "step": 148400 - }, - { - "epoch": 1.3119927862939584, - "grad_norm": 0.8293766975402832, - "learning_rate": 2.8133453561767357e-05, - "loss": 0.57, - "step": 148410 - }, - { - "epoch": 1.3120811895542708, - "grad_norm": 1.6164108514785767, - "learning_rate": 2.813198017409549e-05, - "loss": 0.6959, - "step": 148420 - }, - { - "epoch": 1.3121695928145831, - "grad_norm": 4.570804595947266, - "learning_rate": 2.8130506786423617e-05, - "loss": 0.5991, - "step": 148430 - }, - { - "epoch": 1.3122579960748952, - "grad_norm": 2.8703620433807373, - "learning_rate": 2.8129033398751746e-05, - "loss": 0.7346, - "step": 148440 - }, - { - "epoch": 1.3123463993352074, - "grad_norm": 4.190461158752441, - "learning_rate": 2.8127560011079877e-05, - "loss": 0.5259, - "step": 148450 - }, - { - "epoch": 1.3124348025955197, - "grad_norm": 5.106935977935791, - "learning_rate": 2.8126086623408006e-05, - "loss": 0.6745, - "step": 148460 - }, - { - "epoch": 1.312523205855832, - "grad_norm": 1.2798593044281006, - "learning_rate": 2.8124613235736134e-05, - "loss": 0.6515, - "step": 148470 - }, - { - "epoch": 1.3126116091161442, - "grad_norm": 5.465696811676025, - "learning_rate": 2.8123139848064266e-05, - "loss": 0.6106, - "step": 148480 - }, - { - "epoch": 1.3127000123764565, - "grad_norm": 2.975137710571289, - "learning_rate": 2.8121666460392394e-05, - "loss": 0.541, - "step": 148490 - }, - { - "epoch": 1.3127884156367686, - "grad_norm": 1.5276206731796265, - "learning_rate": 2.8120193072720523e-05, - "loss": 0.6069, - "step": 148500 - }, - { - "epoch": 1.312876818897081, - "grad_norm": 10.58261775970459, - "learning_rate": 2.811871968504865e-05, - "loss": 0.559, - "step": 148510 - }, - { - "epoch": 1.312965222157393, - "grad_norm": 1.9151713848114014, - "learning_rate": 2.8117246297376783e-05, - "loss": 0.5856, - "step": 148520 - }, - { - "epoch": 1.3130536254177054, - "grad_norm": 2.3587288856506348, - "learning_rate": 2.811577290970491e-05, - "loss": 0.5826, - "step": 148530 - }, - { - "epoch": 1.3131420286780178, - "grad_norm": 2.363764524459839, - "learning_rate": 2.811429952203304e-05, - "loss": 0.6009, - "step": 148540 - }, - { - "epoch": 1.31323043193833, - "grad_norm": 4.240105628967285, - "learning_rate": 2.811282613436117e-05, - "loss": 0.6627, - "step": 148550 - }, - { - "epoch": 1.313318835198642, - "grad_norm": 1.2024303674697876, - "learning_rate": 2.81113527466893e-05, - "loss": 0.7069, - "step": 148560 - }, - { - "epoch": 1.3134072384589544, - "grad_norm": 4.543483734130859, - "learning_rate": 2.8109879359017428e-05, - "loss": 0.6394, - "step": 148570 - }, - { - "epoch": 1.3134956417192667, - "grad_norm": 1.2395130395889282, - "learning_rate": 2.810840597134556e-05, - "loss": 0.5426, - "step": 148580 - }, - { - "epoch": 1.3135840449795788, - "grad_norm": 1.3809128999710083, - "learning_rate": 2.8106932583673688e-05, - "loss": 0.7005, - "step": 148590 - }, - { - "epoch": 1.3136724482398912, - "grad_norm": 1.356872797012329, - "learning_rate": 2.8105459196001816e-05, - "loss": 0.5765, - "step": 148600 - }, - { - "epoch": 1.3137608515002033, - "grad_norm": 1.5789388418197632, - "learning_rate": 2.8103985808329948e-05, - "loss": 0.6714, - "step": 148610 - }, - { - "epoch": 1.3138492547605156, - "grad_norm": 2.0288944244384766, - "learning_rate": 2.8102512420658073e-05, - "loss": 0.597, - "step": 148620 - }, - { - "epoch": 1.3139376580208277, - "grad_norm": 1.8794320821762085, - "learning_rate": 2.8101039032986205e-05, - "loss": 0.5266, - "step": 148630 - }, - { - "epoch": 1.31402606128114, - "grad_norm": 1.0160455703735352, - "learning_rate": 2.8099565645314337e-05, - "loss": 0.6366, - "step": 148640 - }, - { - "epoch": 1.3141144645414524, - "grad_norm": 11.037582397460938, - "learning_rate": 2.809809225764246e-05, - "loss": 0.5294, - "step": 148650 - }, - { - "epoch": 1.3142028678017645, - "grad_norm": 3.0306789875030518, - "learning_rate": 2.8096618869970593e-05, - "loss": 0.6022, - "step": 148660 - }, - { - "epoch": 1.3142912710620767, - "grad_norm": 2.8430674076080322, - "learning_rate": 2.8095145482298725e-05, - "loss": 0.6238, - "step": 148670 - }, - { - "epoch": 1.314379674322389, - "grad_norm": 2.3491432666778564, - "learning_rate": 2.809367209462685e-05, - "loss": 0.6024, - "step": 148680 - }, - { - "epoch": 1.3144680775827013, - "grad_norm": 12.166789054870605, - "learning_rate": 2.8092198706954982e-05, - "loss": 0.6587, - "step": 148690 - }, - { - "epoch": 1.3145564808430135, - "grad_norm": 2.9210777282714844, - "learning_rate": 2.8090725319283113e-05, - "loss": 0.533, - "step": 148700 - }, - { - "epoch": 1.3146448841033258, - "grad_norm": 5.058513164520264, - "learning_rate": 2.808925193161124e-05, - "loss": 0.5783, - "step": 148710 - }, - { - "epoch": 1.314733287363638, - "grad_norm": 4.492527008056641, - "learning_rate": 2.808777854393937e-05, - "loss": 0.5599, - "step": 148720 - }, - { - "epoch": 1.3148216906239503, - "grad_norm": 2.55505633354187, - "learning_rate": 2.8086305156267502e-05, - "loss": 0.6899, - "step": 148730 - }, - { - "epoch": 1.3149100938842624, - "grad_norm": 4.788483142852783, - "learning_rate": 2.8084831768595627e-05, - "loss": 0.5483, - "step": 148740 - }, - { - "epoch": 1.3149984971445747, - "grad_norm": 4.912303447723389, - "learning_rate": 2.808335838092376e-05, - "loss": 0.6141, - "step": 148750 - }, - { - "epoch": 1.3150869004048868, - "grad_norm": 1.9395875930786133, - "learning_rate": 2.8081884993251884e-05, - "loss": 0.5256, - "step": 148760 - }, - { - "epoch": 1.3151753036651992, - "grad_norm": 0.8670976758003235, - "learning_rate": 2.8080411605580015e-05, - "loss": 0.6095, - "step": 148770 - }, - { - "epoch": 1.3152637069255113, - "grad_norm": 13.635018348693848, - "learning_rate": 2.8078938217908147e-05, - "loss": 0.6185, - "step": 148780 - }, - { - "epoch": 1.3153521101858237, - "grad_norm": 1.4558939933776855, - "learning_rate": 2.8077464830236272e-05, - "loss": 0.6478, - "step": 148790 - }, - { - "epoch": 1.315440513446136, - "grad_norm": 1.5235179662704468, - "learning_rate": 2.8075991442564404e-05, - "loss": 0.5987, - "step": 148800 - }, - { - "epoch": 1.3155289167064481, - "grad_norm": 2.690790891647339, - "learning_rate": 2.8074518054892535e-05, - "loss": 0.5572, - "step": 148810 - }, - { - "epoch": 1.3156173199667605, - "grad_norm": 8.002449989318848, - "learning_rate": 2.807304466722066e-05, - "loss": 0.6053, - "step": 148820 - }, - { - "epoch": 1.3157057232270726, - "grad_norm": 2.305943250656128, - "learning_rate": 2.8071571279548792e-05, - "loss": 0.6247, - "step": 148830 - }, - { - "epoch": 1.315794126487385, - "grad_norm": 2.879995107650757, - "learning_rate": 2.8070097891876924e-05, - "loss": 0.673, - "step": 148840 - }, - { - "epoch": 1.315882529747697, - "grad_norm": 1.278105616569519, - "learning_rate": 2.806862450420505e-05, - "loss": 0.4747, - "step": 148850 - }, - { - "epoch": 1.3159709330080094, - "grad_norm": 5.807000160217285, - "learning_rate": 2.806715111653318e-05, - "loss": 0.4995, - "step": 148860 - }, - { - "epoch": 1.3160593362683215, - "grad_norm": 1.4099643230438232, - "learning_rate": 2.8065677728861306e-05, - "loss": 0.5967, - "step": 148870 - }, - { - "epoch": 1.3161477395286338, - "grad_norm": 2.9102284908294678, - "learning_rate": 2.8064204341189437e-05, - "loss": 0.64, - "step": 148880 - }, - { - "epoch": 1.316236142788946, - "grad_norm": 1.4413174390792847, - "learning_rate": 2.806273095351757e-05, - "loss": 0.5785, - "step": 148890 - }, - { - "epoch": 1.3163245460492583, - "grad_norm": 4.8757123947143555, - "learning_rate": 2.8061257565845694e-05, - "loss": 0.6616, - "step": 148900 - }, - { - "epoch": 1.3164129493095706, - "grad_norm": 4.079837322235107, - "learning_rate": 2.8059784178173826e-05, - "loss": 0.6722, - "step": 148910 - }, - { - "epoch": 1.3165013525698828, - "grad_norm": 9.008342742919922, - "learning_rate": 2.8058310790501958e-05, - "loss": 0.5445, - "step": 148920 - }, - { - "epoch": 1.3165897558301949, - "grad_norm": 5.8628740310668945, - "learning_rate": 2.8056837402830083e-05, - "loss": 0.6926, - "step": 148930 - }, - { - "epoch": 1.3166781590905072, - "grad_norm": 3.9124386310577393, - "learning_rate": 2.8055364015158214e-05, - "loss": 0.5543, - "step": 148940 - }, - { - "epoch": 1.3167665623508196, - "grad_norm": 2.377822160720825, - "learning_rate": 2.8053890627486346e-05, - "loss": 0.6096, - "step": 148950 - }, - { - "epoch": 1.3168549656111317, - "grad_norm": 2.1724817752838135, - "learning_rate": 2.805241723981447e-05, - "loss": 0.7722, - "step": 148960 - }, - { - "epoch": 1.316943368871444, - "grad_norm": 1.0120038986206055, - "learning_rate": 2.8050943852142603e-05, - "loss": 0.4392, - "step": 148970 - }, - { - "epoch": 1.3170317721317562, - "grad_norm": 10.115762710571289, - "learning_rate": 2.8049470464470728e-05, - "loss": 0.5273, - "step": 148980 - }, - { - "epoch": 1.3171201753920685, - "grad_norm": 7.143669128417969, - "learning_rate": 2.804799707679886e-05, - "loss": 0.4651, - "step": 148990 - }, - { - "epoch": 1.3172085786523806, - "grad_norm": 0.8453541994094849, - "learning_rate": 2.804652368912699e-05, - "loss": 0.6832, - "step": 149000 - }, - { - "epoch": 1.317296981912693, - "grad_norm": 14.636137008666992, - "learning_rate": 2.8045050301455116e-05, - "loss": 0.6548, - "step": 149010 - }, - { - "epoch": 1.3173853851730053, - "grad_norm": 4.40684700012207, - "learning_rate": 2.8043576913783248e-05, - "loss": 0.5335, - "step": 149020 - }, - { - "epoch": 1.3174737884333174, - "grad_norm": 2.7863948345184326, - "learning_rate": 2.804210352611138e-05, - "loss": 0.5586, - "step": 149030 - }, - { - "epoch": 1.3175621916936295, - "grad_norm": 1.8299301862716675, - "learning_rate": 2.8040630138439505e-05, - "loss": 0.6923, - "step": 149040 - }, - { - "epoch": 1.3176505949539419, - "grad_norm": 2.4196178913116455, - "learning_rate": 2.8039156750767636e-05, - "loss": 0.4718, - "step": 149050 - }, - { - "epoch": 1.3177389982142542, - "grad_norm": 2.169053554534912, - "learning_rate": 2.8037683363095768e-05, - "loss": 0.7069, - "step": 149060 - }, - { - "epoch": 1.3178274014745663, - "grad_norm": 12.398027420043945, - "learning_rate": 2.8036209975423893e-05, - "loss": 0.6106, - "step": 149070 - }, - { - "epoch": 1.3179158047348787, - "grad_norm": 7.882556915283203, - "learning_rate": 2.8034736587752025e-05, - "loss": 0.6408, - "step": 149080 - }, - { - "epoch": 1.3180042079951908, - "grad_norm": 1.3828694820404053, - "learning_rate": 2.803326320008015e-05, - "loss": 0.5569, - "step": 149090 - }, - { - "epoch": 1.3180926112555031, - "grad_norm": 3.0277352333068848, - "learning_rate": 2.803178981240828e-05, - "loss": 0.5917, - "step": 149100 - }, - { - "epoch": 1.3181810145158153, - "grad_norm": 3.305237293243408, - "learning_rate": 2.8030316424736413e-05, - "loss": 0.6349, - "step": 149110 - }, - { - "epoch": 1.3182694177761276, - "grad_norm": 1.0532431602478027, - "learning_rate": 2.8028843037064538e-05, - "loss": 0.5667, - "step": 149120 - }, - { - "epoch": 1.31835782103644, - "grad_norm": 0.9725874662399292, - "learning_rate": 2.802736964939267e-05, - "loss": 0.5095, - "step": 149130 - }, - { - "epoch": 1.318446224296752, - "grad_norm": 1.2367267608642578, - "learning_rate": 2.80258962617208e-05, - "loss": 0.6494, - "step": 149140 - }, - { - "epoch": 1.3185346275570642, - "grad_norm": 2.5237839221954346, - "learning_rate": 2.8024422874048927e-05, - "loss": 0.6039, - "step": 149150 - }, - { - "epoch": 1.3186230308173765, - "grad_norm": 1.1270629167556763, - "learning_rate": 2.802294948637706e-05, - "loss": 0.7042, - "step": 149160 - }, - { - "epoch": 1.3187114340776889, - "grad_norm": 3.558521032333374, - "learning_rate": 2.802147609870519e-05, - "loss": 0.5468, - "step": 149170 - }, - { - "epoch": 1.318799837338001, - "grad_norm": 0.6784349083900452, - "learning_rate": 2.8020002711033315e-05, - "loss": 0.4122, - "step": 149180 - }, - { - "epoch": 1.3188882405983133, - "grad_norm": 4.909865379333496, - "learning_rate": 2.8018529323361447e-05, - "loss": 0.7655, - "step": 149190 - }, - { - "epoch": 1.3189766438586255, - "grad_norm": 1.2544851303100586, - "learning_rate": 2.801705593568958e-05, - "loss": 0.6183, - "step": 149200 - }, - { - "epoch": 1.3190650471189378, - "grad_norm": 2.1723010540008545, - "learning_rate": 2.8015582548017704e-05, - "loss": 0.5113, - "step": 149210 - }, - { - "epoch": 1.31915345037925, - "grad_norm": 2.934835195541382, - "learning_rate": 2.8014109160345835e-05, - "loss": 0.4953, - "step": 149220 - }, - { - "epoch": 1.3192418536395623, - "grad_norm": 3.200718879699707, - "learning_rate": 2.801263577267396e-05, - "loss": 0.5348, - "step": 149230 - }, - { - "epoch": 1.3193302568998746, - "grad_norm": 2.423452615737915, - "learning_rate": 2.8011162385002092e-05, - "loss": 0.6038, - "step": 149240 - }, - { - "epoch": 1.3194186601601867, - "grad_norm": 1.3244715929031372, - "learning_rate": 2.8009688997330224e-05, - "loss": 0.5236, - "step": 149250 - }, - { - "epoch": 1.3195070634204988, - "grad_norm": 2.3296327590942383, - "learning_rate": 2.800821560965835e-05, - "loss": 0.7165, - "step": 149260 - }, - { - "epoch": 1.3195954666808112, - "grad_norm": 0.9985233545303345, - "learning_rate": 2.800674222198648e-05, - "loss": 0.5384, - "step": 149270 - }, - { - "epoch": 1.3196838699411235, - "grad_norm": 2.6017701625823975, - "learning_rate": 2.8005268834314612e-05, - "loss": 0.5466, - "step": 149280 - }, - { - "epoch": 1.3197722732014356, - "grad_norm": 1.6243170499801636, - "learning_rate": 2.8003795446642737e-05, - "loss": 0.638, - "step": 149290 - }, - { - "epoch": 1.319860676461748, - "grad_norm": 3.723896026611328, - "learning_rate": 2.800232205897087e-05, - "loss": 0.6559, - "step": 149300 - }, - { - "epoch": 1.31994907972206, - "grad_norm": 0.9679158329963684, - "learning_rate": 2.8000848671299e-05, - "loss": 0.5498, - "step": 149310 - }, - { - "epoch": 1.3200374829823724, - "grad_norm": 8.074565887451172, - "learning_rate": 2.7999375283627126e-05, - "loss": 0.628, - "step": 149320 - }, - { - "epoch": 1.3201258862426846, - "grad_norm": 3.1651248931884766, - "learning_rate": 2.7997901895955257e-05, - "loss": 0.5715, - "step": 149330 - }, - { - "epoch": 1.320214289502997, - "grad_norm": 1.804006814956665, - "learning_rate": 2.7996428508283386e-05, - "loss": 0.4921, - "step": 149340 - }, - { - "epoch": 1.3203026927633092, - "grad_norm": 4.262528896331787, - "learning_rate": 2.7994955120611514e-05, - "loss": 0.5277, - "step": 149350 - }, - { - "epoch": 1.3203910960236214, - "grad_norm": 0.8347508311271667, - "learning_rate": 2.7993481732939646e-05, - "loss": 0.5887, - "step": 149360 - }, - { - "epoch": 1.3204794992839335, - "grad_norm": 2.1242294311523438, - "learning_rate": 2.7992008345267774e-05, - "loss": 0.7227, - "step": 149370 - }, - { - "epoch": 1.3205679025442458, - "grad_norm": 5.066576957702637, - "learning_rate": 2.7990534957595902e-05, - "loss": 0.6711, - "step": 149380 - }, - { - "epoch": 1.3206563058045582, - "grad_norm": 2.174452304840088, - "learning_rate": 2.7989061569924034e-05, - "loss": 0.6392, - "step": 149390 - }, - { - "epoch": 1.3207447090648703, - "grad_norm": 13.127554893493652, - "learning_rate": 2.7987588182252163e-05, - "loss": 0.5315, - "step": 149400 - }, - { - "epoch": 1.3208331123251826, - "grad_norm": 0.6841283440589905, - "learning_rate": 2.798611479458029e-05, - "loss": 0.5714, - "step": 149410 - }, - { - "epoch": 1.3209215155854948, - "grad_norm": 4.091153621673584, - "learning_rate": 2.7984641406908423e-05, - "loss": 0.54, - "step": 149420 - }, - { - "epoch": 1.321009918845807, - "grad_norm": 5.6124091148376465, - "learning_rate": 2.798316801923655e-05, - "loss": 0.6151, - "step": 149430 - }, - { - "epoch": 1.3210983221061192, - "grad_norm": 12.54110336303711, - "learning_rate": 2.798169463156468e-05, - "loss": 0.6296, - "step": 149440 - }, - { - "epoch": 1.3211867253664316, - "grad_norm": 3.029468536376953, - "learning_rate": 2.7980221243892808e-05, - "loss": 0.601, - "step": 149450 - }, - { - "epoch": 1.3212751286267437, - "grad_norm": 2.3717033863067627, - "learning_rate": 2.797874785622094e-05, - "loss": 0.6443, - "step": 149460 - }, - { - "epoch": 1.321363531887056, - "grad_norm": 2.2255585193634033, - "learning_rate": 2.7977274468549068e-05, - "loss": 0.6611, - "step": 149470 - }, - { - "epoch": 1.3214519351473681, - "grad_norm": 2.8330562114715576, - "learning_rate": 2.7975801080877196e-05, - "loss": 0.5535, - "step": 149480 - }, - { - "epoch": 1.3215403384076805, - "grad_norm": 2.395223617553711, - "learning_rate": 2.7974327693205328e-05, - "loss": 0.5336, - "step": 149490 - }, - { - "epoch": 1.3216287416679928, - "grad_norm": 6.6551361083984375, - "learning_rate": 2.7972854305533456e-05, - "loss": 0.6118, - "step": 149500 - }, - { - "epoch": 1.321717144928305, - "grad_norm": 1.2373433113098145, - "learning_rate": 2.7971380917861585e-05, - "loss": 0.4703, - "step": 149510 - }, - { - "epoch": 1.321805548188617, - "grad_norm": 4.395318508148193, - "learning_rate": 2.7969907530189716e-05, - "loss": 0.6532, - "step": 149520 - }, - { - "epoch": 1.3218939514489294, - "grad_norm": 0.9730364680290222, - "learning_rate": 2.7968434142517845e-05, - "loss": 0.5362, - "step": 149530 - }, - { - "epoch": 1.3219823547092417, - "grad_norm": 3.5485353469848633, - "learning_rate": 2.7966960754845973e-05, - "loss": 0.7579, - "step": 149540 - }, - { - "epoch": 1.3220707579695539, - "grad_norm": 2.8976399898529053, - "learning_rate": 2.7965487367174105e-05, - "loss": 0.7407, - "step": 149550 - }, - { - "epoch": 1.3221591612298662, - "grad_norm": 1.7230486869812012, - "learning_rate": 2.796401397950223e-05, - "loss": 0.6945, - "step": 149560 - }, - { - "epoch": 1.3222475644901783, - "grad_norm": 4.44954776763916, - "learning_rate": 2.796254059183036e-05, - "loss": 0.5641, - "step": 149570 - }, - { - "epoch": 1.3223359677504907, - "grad_norm": 5.69603967666626, - "learning_rate": 2.7961067204158493e-05, - "loss": 0.6739, - "step": 149580 - }, - { - "epoch": 1.3224243710108028, - "grad_norm": 4.622913837432861, - "learning_rate": 2.7959593816486618e-05, - "loss": 0.5676, - "step": 149590 - }, - { - "epoch": 1.3225127742711151, - "grad_norm": 3.3708529472351074, - "learning_rate": 2.795812042881475e-05, - "loss": 0.5343, - "step": 149600 - }, - { - "epoch": 1.3226011775314275, - "grad_norm": 3.0041346549987793, - "learning_rate": 2.7956647041142882e-05, - "loss": 0.6227, - "step": 149610 - }, - { - "epoch": 1.3226895807917396, - "grad_norm": 1.8608965873718262, - "learning_rate": 2.7955173653471007e-05, - "loss": 0.5901, - "step": 149620 - }, - { - "epoch": 1.3227779840520517, - "grad_norm": 1.8206435441970825, - "learning_rate": 2.795370026579914e-05, - "loss": 0.5853, - "step": 149630 - }, - { - "epoch": 1.322866387312364, - "grad_norm": 1.5332399606704712, - "learning_rate": 2.795222687812727e-05, - "loss": 0.7259, - "step": 149640 - }, - { - "epoch": 1.3229547905726764, - "grad_norm": 1.5915025472640991, - "learning_rate": 2.7950753490455395e-05, - "loss": 0.6571, - "step": 149650 - }, - { - "epoch": 1.3230431938329885, - "grad_norm": 1.6561287641525269, - "learning_rate": 2.7949280102783527e-05, - "loss": 0.6351, - "step": 149660 - }, - { - "epoch": 1.3231315970933009, - "grad_norm": 11.13310718536377, - "learning_rate": 2.794780671511166e-05, - "loss": 0.5854, - "step": 149670 - }, - { - "epoch": 1.323220000353613, - "grad_norm": 2.8928637504577637, - "learning_rate": 2.7946333327439784e-05, - "loss": 0.4542, - "step": 149680 - }, - { - "epoch": 1.3233084036139253, - "grad_norm": 1.6942949295043945, - "learning_rate": 2.7944859939767915e-05, - "loss": 0.6247, - "step": 149690 - }, - { - "epoch": 1.3233968068742374, - "grad_norm": 1.7843235731124878, - "learning_rate": 2.794338655209604e-05, - "loss": 0.6055, - "step": 149700 - }, - { - "epoch": 1.3234852101345498, - "grad_norm": 2.2763171195983887, - "learning_rate": 2.7941913164424172e-05, - "loss": 0.6487, - "step": 149710 - }, - { - "epoch": 1.3235736133948621, - "grad_norm": 1.9591131210327148, - "learning_rate": 2.7940439776752304e-05, - "loss": 0.5343, - "step": 149720 - }, - { - "epoch": 1.3236620166551742, - "grad_norm": 1.1911338567733765, - "learning_rate": 2.793896638908043e-05, - "loss": 0.621, - "step": 149730 - }, - { - "epoch": 1.3237504199154864, - "grad_norm": 1.1273411512374878, - "learning_rate": 2.793749300140856e-05, - "loss": 0.611, - "step": 149740 - }, - { - "epoch": 1.3238388231757987, - "grad_norm": 2.019596815109253, - "learning_rate": 2.7936019613736692e-05, - "loss": 0.5633, - "step": 149750 - }, - { - "epoch": 1.323927226436111, - "grad_norm": 2.4595444202423096, - "learning_rate": 2.7934546226064817e-05, - "loss": 0.6861, - "step": 149760 - }, - { - "epoch": 1.3240156296964232, - "grad_norm": 1.9519199132919312, - "learning_rate": 2.793307283839295e-05, - "loss": 0.5056, - "step": 149770 - }, - { - "epoch": 1.3241040329567355, - "grad_norm": 1.9743772745132446, - "learning_rate": 2.793159945072108e-05, - "loss": 0.7267, - "step": 149780 - }, - { - "epoch": 1.3241924362170476, - "grad_norm": 4.21689510345459, - "learning_rate": 2.7930126063049206e-05, - "loss": 0.5181, - "step": 149790 - }, - { - "epoch": 1.32428083947736, - "grad_norm": 1.7165127992630005, - "learning_rate": 2.7928652675377337e-05, - "loss": 0.67, - "step": 149800 - }, - { - "epoch": 1.324369242737672, - "grad_norm": 2.287660837173462, - "learning_rate": 2.7927179287705462e-05, - "loss": 0.5722, - "step": 149810 - }, - { - "epoch": 1.3244576459979844, - "grad_norm": 1.8989897966384888, - "learning_rate": 2.7925705900033594e-05, - "loss": 0.5072, - "step": 149820 - }, - { - "epoch": 1.3245460492582968, - "grad_norm": 2.7715299129486084, - "learning_rate": 2.7924232512361726e-05, - "loss": 0.6686, - "step": 149830 - }, - { - "epoch": 1.324634452518609, - "grad_norm": 1.2070786952972412, - "learning_rate": 2.792275912468985e-05, - "loss": 0.5873, - "step": 149840 - }, - { - "epoch": 1.324722855778921, - "grad_norm": 2.238295316696167, - "learning_rate": 2.7921285737017983e-05, - "loss": 0.6277, - "step": 149850 - }, - { - "epoch": 1.3248112590392334, - "grad_norm": 3.2079405784606934, - "learning_rate": 2.7919812349346114e-05, - "loss": 0.6083, - "step": 149860 - }, - { - "epoch": 1.3248996622995457, - "grad_norm": 1.5579689741134644, - "learning_rate": 2.791833896167424e-05, - "loss": 0.5147, - "step": 149870 - }, - { - "epoch": 1.3249880655598578, - "grad_norm": 1.6787261962890625, - "learning_rate": 2.791686557400237e-05, - "loss": 0.678, - "step": 149880 - }, - { - "epoch": 1.3250764688201702, - "grad_norm": 1.855412483215332, - "learning_rate": 2.7915392186330503e-05, - "loss": 0.5387, - "step": 149890 - }, - { - "epoch": 1.3251648720804823, - "grad_norm": 2.181928873062134, - "learning_rate": 2.7913918798658628e-05, - "loss": 0.5415, - "step": 149900 - }, - { - "epoch": 1.3252532753407946, - "grad_norm": 2.0913829803466797, - "learning_rate": 2.791244541098676e-05, - "loss": 0.5816, - "step": 149910 - }, - { - "epoch": 1.3253416786011067, - "grad_norm": 3.86319637298584, - "learning_rate": 2.7910972023314884e-05, - "loss": 0.5917, - "step": 149920 - }, - { - "epoch": 1.325430081861419, - "grad_norm": 3.071532964706421, - "learning_rate": 2.7909498635643016e-05, - "loss": 0.4953, - "step": 149930 - }, - { - "epoch": 1.3255184851217314, - "grad_norm": 1.556351900100708, - "learning_rate": 2.7908025247971148e-05, - "loss": 0.6378, - "step": 149940 - }, - { - "epoch": 1.3256068883820435, - "grad_norm": 7.783919811248779, - "learning_rate": 2.7906551860299273e-05, - "loss": 0.7176, - "step": 149950 - }, - { - "epoch": 1.3256952916423557, - "grad_norm": 3.308544635772705, - "learning_rate": 2.7905078472627405e-05, - "loss": 0.6186, - "step": 149960 - }, - { - "epoch": 1.325783694902668, - "grad_norm": 9.659269332885742, - "learning_rate": 2.7903605084955536e-05, - "loss": 0.6223, - "step": 149970 - }, - { - "epoch": 1.3258720981629804, - "grad_norm": 1.5997798442840576, - "learning_rate": 2.790213169728366e-05, - "loss": 0.6082, - "step": 149980 - }, - { - "epoch": 1.3259605014232925, - "grad_norm": 2.495012044906616, - "learning_rate": 2.7900658309611793e-05, - "loss": 0.6557, - "step": 149990 - }, - { - "epoch": 1.3260489046836048, - "grad_norm": 6.634875774383545, - "learning_rate": 2.7899184921939925e-05, - "loss": 0.6724, - "step": 150000 - }, - { - "epoch": 1.326137307943917, - "grad_norm": 1.9638028144836426, - "learning_rate": 2.789771153426805e-05, - "loss": 0.6969, - "step": 150010 - }, - { - "epoch": 1.3262257112042293, - "grad_norm": 1.1258223056793213, - "learning_rate": 2.789623814659618e-05, - "loss": 0.5816, - "step": 150020 - }, - { - "epoch": 1.3263141144645414, - "grad_norm": 13.562403678894043, - "learning_rate": 2.7894764758924306e-05, - "loss": 0.4861, - "step": 150030 - }, - { - "epoch": 1.3264025177248537, - "grad_norm": 4.885910511016846, - "learning_rate": 2.7893291371252438e-05, - "loss": 0.6611, - "step": 150040 - }, - { - "epoch": 1.3264909209851659, - "grad_norm": 10.201705932617188, - "learning_rate": 2.789181798358057e-05, - "loss": 0.6298, - "step": 150050 - }, - { - "epoch": 1.3265793242454782, - "grad_norm": 20.267230987548828, - "learning_rate": 2.7890344595908695e-05, - "loss": 0.6347, - "step": 150060 - }, - { - "epoch": 1.3266677275057903, - "grad_norm": 2.118298292160034, - "learning_rate": 2.7888871208236827e-05, - "loss": 0.6034, - "step": 150070 - }, - { - "epoch": 1.3267561307661027, - "grad_norm": 1.4050759077072144, - "learning_rate": 2.788739782056496e-05, - "loss": 0.5572, - "step": 150080 - }, - { - "epoch": 1.326844534026415, - "grad_norm": 14.408783912658691, - "learning_rate": 2.7885924432893083e-05, - "loss": 0.7032, - "step": 150090 - }, - { - "epoch": 1.3269329372867271, - "grad_norm": 4.709105014801025, - "learning_rate": 2.7884451045221215e-05, - "loss": 0.6664, - "step": 150100 - }, - { - "epoch": 1.3270213405470392, - "grad_norm": 2.413173198699951, - "learning_rate": 2.7882977657549347e-05, - "loss": 0.685, - "step": 150110 - }, - { - "epoch": 1.3271097438073516, - "grad_norm": 2.0782058238983154, - "learning_rate": 2.7881504269877472e-05, - "loss": 0.5607, - "step": 150120 - }, - { - "epoch": 1.327198147067664, - "grad_norm": 3.12268328666687, - "learning_rate": 2.7880030882205604e-05, - "loss": 0.6153, - "step": 150130 - }, - { - "epoch": 1.327286550327976, - "grad_norm": 0.6943034529685974, - "learning_rate": 2.7878557494533735e-05, - "loss": 0.4618, - "step": 150140 - }, - { - "epoch": 1.3273749535882884, - "grad_norm": 3.122631072998047, - "learning_rate": 2.787708410686186e-05, - "loss": 0.6025, - "step": 150150 - }, - { - "epoch": 1.3274633568486005, - "grad_norm": 4.157289981842041, - "learning_rate": 2.7875610719189992e-05, - "loss": 0.503, - "step": 150160 - }, - { - "epoch": 1.3275517601089128, - "grad_norm": 0.9801242351531982, - "learning_rate": 2.7874137331518117e-05, - "loss": 0.7158, - "step": 150170 - }, - { - "epoch": 1.327640163369225, - "grad_norm": 4.750873565673828, - "learning_rate": 2.787266394384625e-05, - "loss": 0.5517, - "step": 150180 - }, - { - "epoch": 1.3277285666295373, - "grad_norm": 2.4548332691192627, - "learning_rate": 2.787119055617438e-05, - "loss": 0.6068, - "step": 150190 - }, - { - "epoch": 1.3278169698898497, - "grad_norm": 1.4304416179656982, - "learning_rate": 2.7869717168502505e-05, - "loss": 0.701, - "step": 150200 - }, - { - "epoch": 1.3279053731501618, - "grad_norm": 3.8410096168518066, - "learning_rate": 2.7868243780830637e-05, - "loss": 0.502, - "step": 150210 - }, - { - "epoch": 1.327993776410474, - "grad_norm": 1.530448317527771, - "learning_rate": 2.786677039315877e-05, - "loss": 0.5823, - "step": 150220 - }, - { - "epoch": 1.3280821796707862, - "grad_norm": 1.39223051071167, - "learning_rate": 2.7865297005486894e-05, - "loss": 0.5787, - "step": 150230 - }, - { - "epoch": 1.3281705829310986, - "grad_norm": 2.7651476860046387, - "learning_rate": 2.7863823617815026e-05, - "loss": 0.667, - "step": 150240 - }, - { - "epoch": 1.3282589861914107, - "grad_norm": 2.3552401065826416, - "learning_rate": 2.7862350230143157e-05, - "loss": 0.5412, - "step": 150250 - }, - { - "epoch": 1.328347389451723, - "grad_norm": 1.4385193586349487, - "learning_rate": 2.7860876842471286e-05, - "loss": 0.6674, - "step": 150260 - }, - { - "epoch": 1.3284357927120352, - "grad_norm": 1.6505261659622192, - "learning_rate": 2.7859403454799414e-05, - "loss": 0.5412, - "step": 150270 - }, - { - "epoch": 1.3285241959723475, - "grad_norm": 1.7234666347503662, - "learning_rate": 2.7857930067127542e-05, - "loss": 0.6793, - "step": 150280 - }, - { - "epoch": 1.3286125992326596, - "grad_norm": 1.9166803359985352, - "learning_rate": 2.7856456679455674e-05, - "loss": 0.4958, - "step": 150290 - }, - { - "epoch": 1.328701002492972, - "grad_norm": 2.502293348312378, - "learning_rate": 2.7854983291783803e-05, - "loss": 0.5464, - "step": 150300 - }, - { - "epoch": 1.3287894057532843, - "grad_norm": 4.724217414855957, - "learning_rate": 2.785350990411193e-05, - "loss": 0.7003, - "step": 150310 - }, - { - "epoch": 1.3288778090135964, - "grad_norm": 2.1562182903289795, - "learning_rate": 2.7852036516440063e-05, - "loss": 0.5643, - "step": 150320 - }, - { - "epoch": 1.3289662122739085, - "grad_norm": 2.982635736465454, - "learning_rate": 2.785056312876819e-05, - "loss": 0.767, - "step": 150330 - }, - { - "epoch": 1.3290546155342209, - "grad_norm": 1.9141908884048462, - "learning_rate": 2.784908974109632e-05, - "loss": 0.604, - "step": 150340 - }, - { - "epoch": 1.3291430187945332, - "grad_norm": 4.531190872192383, - "learning_rate": 2.784761635342445e-05, - "loss": 0.7938, - "step": 150350 - }, - { - "epoch": 1.3292314220548453, - "grad_norm": 3.92395281791687, - "learning_rate": 2.784614296575258e-05, - "loss": 0.6323, - "step": 150360 - }, - { - "epoch": 1.3293198253151577, - "grad_norm": 1.299192190170288, - "learning_rate": 2.7844669578080708e-05, - "loss": 0.6143, - "step": 150370 - }, - { - "epoch": 1.3294082285754698, - "grad_norm": 7.759983062744141, - "learning_rate": 2.784319619040884e-05, - "loss": 0.6597, - "step": 150380 - }, - { - "epoch": 1.3294966318357821, - "grad_norm": 4.2938361167907715, - "learning_rate": 2.7841722802736965e-05, - "loss": 0.6369, - "step": 150390 - }, - { - "epoch": 1.3295850350960943, - "grad_norm": 27.397857666015625, - "learning_rate": 2.7840249415065096e-05, - "loss": 0.6365, - "step": 150400 - }, - { - "epoch": 1.3296734383564066, - "grad_norm": 1.1241165399551392, - "learning_rate": 2.7838776027393228e-05, - "loss": 0.5248, - "step": 150410 - }, - { - "epoch": 1.329761841616719, - "grad_norm": 5.163040637969971, - "learning_rate": 2.7837302639721353e-05, - "loss": 0.5497, - "step": 150420 - }, - { - "epoch": 1.329850244877031, - "grad_norm": 2.237762928009033, - "learning_rate": 2.7835829252049485e-05, - "loss": 0.6564, - "step": 150430 - }, - { - "epoch": 1.3299386481373432, - "grad_norm": 5.071065425872803, - "learning_rate": 2.7834355864377616e-05, - "loss": 0.5274, - "step": 150440 - }, - { - "epoch": 1.3300270513976555, - "grad_norm": 5.093015193939209, - "learning_rate": 2.783288247670574e-05, - "loss": 0.6916, - "step": 150450 - }, - { - "epoch": 1.3301154546579679, - "grad_norm": 1.2640419006347656, - "learning_rate": 2.7831409089033873e-05, - "loss": 0.6796, - "step": 150460 - }, - { - "epoch": 1.33020385791828, - "grad_norm": 24.784549713134766, - "learning_rate": 2.7829935701362005e-05, - "loss": 0.7552, - "step": 150470 - }, - { - "epoch": 1.3302922611785923, - "grad_norm": 2.175790548324585, - "learning_rate": 2.782846231369013e-05, - "loss": 0.5447, - "step": 150480 - }, - { - "epoch": 1.3303806644389045, - "grad_norm": 10.204320907592773, - "learning_rate": 2.782698892601826e-05, - "loss": 0.5061, - "step": 150490 - }, - { - "epoch": 1.3304690676992168, - "grad_norm": 1.5230896472930908, - "learning_rate": 2.7825515538346387e-05, - "loss": 0.6622, - "step": 150500 - }, - { - "epoch": 1.330557470959529, - "grad_norm": 2.734492063522339, - "learning_rate": 2.7824042150674518e-05, - "loss": 0.6224, - "step": 150510 - }, - { - "epoch": 1.3306458742198413, - "grad_norm": 3.1669328212738037, - "learning_rate": 2.782256876300265e-05, - "loss": 0.8145, - "step": 150520 - }, - { - "epoch": 1.3307342774801536, - "grad_norm": 1.6175446510314941, - "learning_rate": 2.7821095375330775e-05, - "loss": 0.4171, - "step": 150530 - }, - { - "epoch": 1.3308226807404657, - "grad_norm": 2.3916404247283936, - "learning_rate": 2.7819621987658907e-05, - "loss": 0.6381, - "step": 150540 - }, - { - "epoch": 1.3309110840007778, - "grad_norm": 6.5817108154296875, - "learning_rate": 2.781814859998704e-05, - "loss": 0.7172, - "step": 150550 - }, - { - "epoch": 1.3309994872610902, - "grad_norm": 2.919212579727173, - "learning_rate": 2.7816675212315163e-05, - "loss": 0.5731, - "step": 150560 - }, - { - "epoch": 1.3310878905214025, - "grad_norm": 2.1812314987182617, - "learning_rate": 2.7815201824643295e-05, - "loss": 0.6087, - "step": 150570 - }, - { - "epoch": 1.3311762937817146, - "grad_norm": 2.6658101081848145, - "learning_rate": 2.7813728436971427e-05, - "loss": 0.5804, - "step": 150580 - }, - { - "epoch": 1.331264697042027, - "grad_norm": 2.9856085777282715, - "learning_rate": 2.7812255049299552e-05, - "loss": 0.6247, - "step": 150590 - }, - { - "epoch": 1.331353100302339, - "grad_norm": 1.3335188627243042, - "learning_rate": 2.7810781661627684e-05, - "loss": 0.5555, - "step": 150600 - }, - { - "epoch": 1.3314415035626515, - "grad_norm": 3.168314218521118, - "learning_rate": 2.7809308273955815e-05, - "loss": 0.5928, - "step": 150610 - }, - { - "epoch": 1.3315299068229636, - "grad_norm": 3.5253214836120605, - "learning_rate": 2.780783488628394e-05, - "loss": 0.8322, - "step": 150620 - }, - { - "epoch": 1.331618310083276, - "grad_norm": 1.6161246299743652, - "learning_rate": 2.7806361498612072e-05, - "loss": 0.6468, - "step": 150630 - }, - { - "epoch": 1.331706713343588, - "grad_norm": 5.83316707611084, - "learning_rate": 2.7804888110940197e-05, - "loss": 0.6055, - "step": 150640 - }, - { - "epoch": 1.3317951166039004, - "grad_norm": 2.1565229892730713, - "learning_rate": 2.780341472326833e-05, - "loss": 0.6758, - "step": 150650 - }, - { - "epoch": 1.3318835198642125, - "grad_norm": 1.9444721937179565, - "learning_rate": 2.780194133559646e-05, - "loss": 0.6554, - "step": 150660 - }, - { - "epoch": 1.3319719231245248, - "grad_norm": 1.1099681854248047, - "learning_rate": 2.7800467947924586e-05, - "loss": 0.6158, - "step": 150670 - }, - { - "epoch": 1.3320603263848372, - "grad_norm": 2.171590566635132, - "learning_rate": 2.7798994560252717e-05, - "loss": 0.58, - "step": 150680 - }, - { - "epoch": 1.3321487296451493, - "grad_norm": 6.700047492980957, - "learning_rate": 2.779752117258085e-05, - "loss": 0.7676, - "step": 150690 - }, - { - "epoch": 1.3322371329054614, - "grad_norm": 2.354696750640869, - "learning_rate": 2.7796047784908974e-05, - "loss": 0.6369, - "step": 150700 - }, - { - "epoch": 1.3323255361657738, - "grad_norm": 3.3658783435821533, - "learning_rate": 2.7794574397237106e-05, - "loss": 0.4878, - "step": 150710 - }, - { - "epoch": 1.332413939426086, - "grad_norm": 2.440730571746826, - "learning_rate": 2.7793101009565237e-05, - "loss": 0.5149, - "step": 150720 - }, - { - "epoch": 1.3325023426863982, - "grad_norm": 3.293100595474243, - "learning_rate": 2.7791627621893362e-05, - "loss": 0.6807, - "step": 150730 - }, - { - "epoch": 1.3325907459467106, - "grad_norm": 1.672044277191162, - "learning_rate": 2.7790154234221494e-05, - "loss": 0.6752, - "step": 150740 - }, - { - "epoch": 1.3326791492070227, - "grad_norm": 1.8963091373443604, - "learning_rate": 2.778868084654962e-05, - "loss": 0.5227, - "step": 150750 - }, - { - "epoch": 1.332767552467335, - "grad_norm": 2.4444239139556885, - "learning_rate": 2.778720745887775e-05, - "loss": 0.5804, - "step": 150760 - }, - { - "epoch": 1.3328559557276471, - "grad_norm": 2.397829294204712, - "learning_rate": 2.7785734071205883e-05, - "loss": 0.6672, - "step": 150770 - }, - { - "epoch": 1.3329443589879595, - "grad_norm": 8.323978424072266, - "learning_rate": 2.7784260683534008e-05, - "loss": 0.6131, - "step": 150780 - }, - { - "epoch": 1.3330327622482718, - "grad_norm": 5.0845947265625, - "learning_rate": 2.778278729586214e-05, - "loss": 0.6846, - "step": 150790 - }, - { - "epoch": 1.333121165508584, - "grad_norm": 3.428443431854248, - "learning_rate": 2.778131390819027e-05, - "loss": 0.5184, - "step": 150800 - }, - { - "epoch": 1.333209568768896, - "grad_norm": 1.5157850980758667, - "learning_rate": 2.7779840520518396e-05, - "loss": 0.688, - "step": 150810 - }, - { - "epoch": 1.3332979720292084, - "grad_norm": 2.5739991664886475, - "learning_rate": 2.7778367132846528e-05, - "loss": 0.6031, - "step": 150820 - }, - { - "epoch": 1.3333863752895208, - "grad_norm": 3.486426830291748, - "learning_rate": 2.777689374517466e-05, - "loss": 0.7157, - "step": 150830 - }, - { - "epoch": 1.3334747785498329, - "grad_norm": 1.507509708404541, - "learning_rate": 2.7775420357502784e-05, - "loss": 0.5884, - "step": 150840 - }, - { - "epoch": 1.3335631818101452, - "grad_norm": 3.454697370529175, - "learning_rate": 2.7773946969830916e-05, - "loss": 0.6618, - "step": 150850 - }, - { - "epoch": 1.3336515850704573, - "grad_norm": 2.0091540813446045, - "learning_rate": 2.777247358215904e-05, - "loss": 0.6251, - "step": 150860 - }, - { - "epoch": 1.3337399883307697, - "grad_norm": 1.098272681236267, - "learning_rate": 2.7771000194487173e-05, - "loss": 0.5648, - "step": 150870 - }, - { - "epoch": 1.3338283915910818, - "grad_norm": 18.883617401123047, - "learning_rate": 2.7769526806815305e-05, - "loss": 0.5722, - "step": 150880 - }, - { - "epoch": 1.3339167948513941, - "grad_norm": 1.5560534000396729, - "learning_rate": 2.776805341914343e-05, - "loss": 0.5098, - "step": 150890 - }, - { - "epoch": 1.3340051981117065, - "grad_norm": 1.6480363607406616, - "learning_rate": 2.776658003147156e-05, - "loss": 0.6093, - "step": 150900 - }, - { - "epoch": 1.3340936013720186, - "grad_norm": 6.449525833129883, - "learning_rate": 2.7765106643799693e-05, - "loss": 0.6692, - "step": 150910 - }, - { - "epoch": 1.3341820046323307, - "grad_norm": 10.009638786315918, - "learning_rate": 2.7763633256127818e-05, - "loss": 0.5721, - "step": 150920 - }, - { - "epoch": 1.334270407892643, - "grad_norm": 4.691011428833008, - "learning_rate": 2.776215986845595e-05, - "loss": 0.7491, - "step": 150930 - }, - { - "epoch": 1.3343588111529554, - "grad_norm": 1.4988094568252563, - "learning_rate": 2.776068648078408e-05, - "loss": 0.6663, - "step": 150940 - }, - { - "epoch": 1.3344472144132675, - "grad_norm": 0.885347306728363, - "learning_rate": 2.7759213093112207e-05, - "loss": 0.6074, - "step": 150950 - }, - { - "epoch": 1.3345356176735799, - "grad_norm": 4.787322044372559, - "learning_rate": 2.7757739705440338e-05, - "loss": 0.6732, - "step": 150960 - }, - { - "epoch": 1.334624020933892, - "grad_norm": 2.074153423309326, - "learning_rate": 2.7756266317768463e-05, - "loss": 0.547, - "step": 150970 - }, - { - "epoch": 1.3347124241942043, - "grad_norm": 4.47625207901001, - "learning_rate": 2.7754792930096595e-05, - "loss": 0.5978, - "step": 150980 - }, - { - "epoch": 1.3348008274545164, - "grad_norm": 1.9079277515411377, - "learning_rate": 2.7753319542424727e-05, - "loss": 0.604, - "step": 150990 - }, - { - "epoch": 1.3348892307148288, - "grad_norm": 5.470847129821777, - "learning_rate": 2.775184615475285e-05, - "loss": 0.5262, - "step": 151000 - }, - { - "epoch": 1.3349776339751411, - "grad_norm": 1.6069300174713135, - "learning_rate": 2.7750372767080983e-05, - "loss": 0.5069, - "step": 151010 - }, - { - "epoch": 1.3350660372354533, - "grad_norm": 1.8883270025253296, - "learning_rate": 2.7748899379409115e-05, - "loss": 0.554, - "step": 151020 - }, - { - "epoch": 1.3351544404957654, - "grad_norm": 1.1260170936584473, - "learning_rate": 2.774742599173724e-05, - "loss": 0.5093, - "step": 151030 - }, - { - "epoch": 1.3352428437560777, - "grad_norm": 1.3982582092285156, - "learning_rate": 2.7745952604065372e-05, - "loss": 0.6464, - "step": 151040 - }, - { - "epoch": 1.33533124701639, - "grad_norm": 6.610744953155518, - "learning_rate": 2.7744479216393504e-05, - "loss": 0.5489, - "step": 151050 - }, - { - "epoch": 1.3354196502767022, - "grad_norm": 5.942538738250732, - "learning_rate": 2.774300582872163e-05, - "loss": 0.6697, - "step": 151060 - }, - { - "epoch": 1.3355080535370145, - "grad_norm": 1.7882872819900513, - "learning_rate": 2.774153244104976e-05, - "loss": 0.6564, - "step": 151070 - }, - { - "epoch": 1.3355964567973266, - "grad_norm": 1.5126078128814697, - "learning_rate": 2.7740059053377892e-05, - "loss": 0.5687, - "step": 151080 - }, - { - "epoch": 1.335684860057639, - "grad_norm": 8.000627517700195, - "learning_rate": 2.7738585665706017e-05, - "loss": 0.5835, - "step": 151090 - }, - { - "epoch": 1.335773263317951, - "grad_norm": 4.476342678070068, - "learning_rate": 2.773711227803415e-05, - "loss": 0.4402, - "step": 151100 - }, - { - "epoch": 1.3358616665782634, - "grad_norm": 1.719919204711914, - "learning_rate": 2.7735638890362277e-05, - "loss": 0.4522, - "step": 151110 - }, - { - "epoch": 1.3359500698385758, - "grad_norm": 3.59228515625, - "learning_rate": 2.7734165502690405e-05, - "loss": 0.4962, - "step": 151120 - }, - { - "epoch": 1.336038473098888, - "grad_norm": 1.5816543102264404, - "learning_rate": 2.7732692115018537e-05, - "loss": 0.6337, - "step": 151130 - }, - { - "epoch": 1.3361268763592, - "grad_norm": 3.1262903213500977, - "learning_rate": 2.7731218727346666e-05, - "loss": 0.6649, - "step": 151140 - }, - { - "epoch": 1.3362152796195124, - "grad_norm": 2.3579840660095215, - "learning_rate": 2.7729745339674794e-05, - "loss": 0.6421, - "step": 151150 - }, - { - "epoch": 1.3363036828798247, - "grad_norm": 2.981292963027954, - "learning_rate": 2.7728271952002926e-05, - "loss": 0.6245, - "step": 151160 - }, - { - "epoch": 1.3363920861401368, - "grad_norm": 1.7436307668685913, - "learning_rate": 2.7726798564331054e-05, - "loss": 0.4058, - "step": 151170 - }, - { - "epoch": 1.3364804894004492, - "grad_norm": 4.4524312019348145, - "learning_rate": 2.7725325176659182e-05, - "loss": 0.6272, - "step": 151180 - }, - { - "epoch": 1.3365688926607613, - "grad_norm": 8.533951759338379, - "learning_rate": 2.7723851788987314e-05, - "loss": 0.723, - "step": 151190 - }, - { - "epoch": 1.3366572959210736, - "grad_norm": 2.9351816177368164, - "learning_rate": 2.7722378401315442e-05, - "loss": 0.6489, - "step": 151200 - }, - { - "epoch": 1.3367456991813857, - "grad_norm": 2.2552337646484375, - "learning_rate": 2.772090501364357e-05, - "loss": 0.5402, - "step": 151210 - }, - { - "epoch": 1.336834102441698, - "grad_norm": 10.673256874084473, - "learning_rate": 2.77194316259717e-05, - "loss": 0.5567, - "step": 151220 - }, - { - "epoch": 1.3369225057020102, - "grad_norm": 3.3659915924072266, - "learning_rate": 2.771795823829983e-05, - "loss": 0.6305, - "step": 151230 - }, - { - "epoch": 1.3370109089623226, - "grad_norm": 1.2768335342407227, - "learning_rate": 2.771648485062796e-05, - "loss": 0.5264, - "step": 151240 - }, - { - "epoch": 1.3370993122226347, - "grad_norm": 21.0323486328125, - "learning_rate": 2.7715011462956088e-05, - "loss": 0.5905, - "step": 151250 - }, - { - "epoch": 1.337187715482947, - "grad_norm": 4.157071113586426, - "learning_rate": 2.771353807528422e-05, - "loss": 0.6217, - "step": 151260 - }, - { - "epoch": 1.3372761187432594, - "grad_norm": 5.403841018676758, - "learning_rate": 2.7712064687612348e-05, - "loss": 0.6086, - "step": 151270 - }, - { - "epoch": 1.3373645220035715, - "grad_norm": 8.941508293151855, - "learning_rate": 2.7710591299940476e-05, - "loss": 0.5825, - "step": 151280 - }, - { - "epoch": 1.3374529252638836, - "grad_norm": 0.8912774920463562, - "learning_rate": 2.7709117912268608e-05, - "loss": 0.5628, - "step": 151290 - }, - { - "epoch": 1.337541328524196, - "grad_norm": 4.192302703857422, - "learning_rate": 2.7707644524596736e-05, - "loss": 0.6462, - "step": 151300 - }, - { - "epoch": 1.3376297317845083, - "grad_norm": 2.739774227142334, - "learning_rate": 2.7706171136924865e-05, - "loss": 0.6257, - "step": 151310 - }, - { - "epoch": 1.3377181350448204, - "grad_norm": 4.046346187591553, - "learning_rate": 2.7704697749252996e-05, - "loss": 0.5678, - "step": 151320 - }, - { - "epoch": 1.3378065383051327, - "grad_norm": 2.878293037414551, - "learning_rate": 2.770322436158112e-05, - "loss": 0.6428, - "step": 151330 - }, - { - "epoch": 1.3378949415654449, - "grad_norm": 5.418063640594482, - "learning_rate": 2.7701750973909253e-05, - "loss": 0.6374, - "step": 151340 - }, - { - "epoch": 1.3379833448257572, - "grad_norm": 1.9555840492248535, - "learning_rate": 2.7700277586237385e-05, - "loss": 0.6421, - "step": 151350 - }, - { - "epoch": 1.3380717480860693, - "grad_norm": 3.0847692489624023, - "learning_rate": 2.769880419856551e-05, - "loss": 0.4415, - "step": 151360 - }, - { - "epoch": 1.3381601513463817, - "grad_norm": 1.3546347618103027, - "learning_rate": 2.769733081089364e-05, - "loss": 0.7576, - "step": 151370 - }, - { - "epoch": 1.338248554606694, - "grad_norm": 1.5298864841461182, - "learning_rate": 2.7695857423221773e-05, - "loss": 0.5746, - "step": 151380 - }, - { - "epoch": 1.3383369578670061, - "grad_norm": 2.744459867477417, - "learning_rate": 2.7694384035549898e-05, - "loss": 0.7344, - "step": 151390 - }, - { - "epoch": 1.3384253611273182, - "grad_norm": 1.2843074798583984, - "learning_rate": 2.769291064787803e-05, - "loss": 0.5737, - "step": 151400 - }, - { - "epoch": 1.3385137643876306, - "grad_norm": 1.539597749710083, - "learning_rate": 2.769143726020616e-05, - "loss": 0.6573, - "step": 151410 - }, - { - "epoch": 1.338602167647943, - "grad_norm": 9.883866310119629, - "learning_rate": 2.7689963872534287e-05, - "loss": 0.598, - "step": 151420 - }, - { - "epoch": 1.338690570908255, - "grad_norm": 1.1899820566177368, - "learning_rate": 2.768849048486242e-05, - "loss": 0.6141, - "step": 151430 - }, - { - "epoch": 1.3387789741685674, - "grad_norm": 3.887343168258667, - "learning_rate": 2.768701709719055e-05, - "loss": 0.6354, - "step": 151440 - }, - { - "epoch": 1.3388673774288795, - "grad_norm": 9.095852851867676, - "learning_rate": 2.7685543709518675e-05, - "loss": 0.554, - "step": 151450 - }, - { - "epoch": 1.3389557806891919, - "grad_norm": 1.7072582244873047, - "learning_rate": 2.7684070321846807e-05, - "loss": 0.6264, - "step": 151460 - }, - { - "epoch": 1.339044183949504, - "grad_norm": 2.419516086578369, - "learning_rate": 2.7682596934174932e-05, - "loss": 0.6815, - "step": 151470 - }, - { - "epoch": 1.3391325872098163, - "grad_norm": 11.242193222045898, - "learning_rate": 2.7681123546503064e-05, - "loss": 0.599, - "step": 151480 - }, - { - "epoch": 1.3392209904701287, - "grad_norm": 2.0562210083007812, - "learning_rate": 2.7679650158831195e-05, - "loss": 0.6353, - "step": 151490 - }, - { - "epoch": 1.3393093937304408, - "grad_norm": 5.687093257904053, - "learning_rate": 2.767817677115932e-05, - "loss": 0.5104, - "step": 151500 - }, - { - "epoch": 1.339397796990753, - "grad_norm": 1.5878093242645264, - "learning_rate": 2.7676703383487452e-05, - "loss": 0.5923, - "step": 151510 - }, - { - "epoch": 1.3394862002510652, - "grad_norm": 0.9044582843780518, - "learning_rate": 2.7675229995815584e-05, - "loss": 0.6265, - "step": 151520 - }, - { - "epoch": 1.3395746035113776, - "grad_norm": 3.482861042022705, - "learning_rate": 2.767375660814371e-05, - "loss": 0.6293, - "step": 151530 - }, - { - "epoch": 1.3396630067716897, - "grad_norm": 1.3492008447647095, - "learning_rate": 2.767228322047184e-05, - "loss": 0.5917, - "step": 151540 - }, - { - "epoch": 1.339751410032002, - "grad_norm": 6.387631416320801, - "learning_rate": 2.7670809832799972e-05, - "loss": 0.5207, - "step": 151550 - }, - { - "epoch": 1.3398398132923142, - "grad_norm": 6.104571342468262, - "learning_rate": 2.7669336445128097e-05, - "loss": 0.5676, - "step": 151560 - }, - { - "epoch": 1.3399282165526265, - "grad_norm": 11.630414962768555, - "learning_rate": 2.766786305745623e-05, - "loss": 0.5068, - "step": 151570 - }, - { - "epoch": 1.3400166198129386, - "grad_norm": 6.5402421951293945, - "learning_rate": 2.7666389669784354e-05, - "loss": 0.7379, - "step": 151580 - }, - { - "epoch": 1.340105023073251, - "grad_norm": 2.0985918045043945, - "learning_rate": 2.7664916282112486e-05, - "loss": 0.6612, - "step": 151590 - }, - { - "epoch": 1.3401934263335633, - "grad_norm": 2.4103410243988037, - "learning_rate": 2.7663442894440617e-05, - "loss": 0.5752, - "step": 151600 - }, - { - "epoch": 1.3402818295938754, - "grad_norm": 1.082924485206604, - "learning_rate": 2.7661969506768742e-05, - "loss": 0.5852, - "step": 151610 - }, - { - "epoch": 1.3403702328541875, - "grad_norm": 7.554436206817627, - "learning_rate": 2.7660496119096874e-05, - "loss": 0.588, - "step": 151620 - }, - { - "epoch": 1.3404586361145, - "grad_norm": 1.3122897148132324, - "learning_rate": 2.7659022731425006e-05, - "loss": 0.5607, - "step": 151630 - }, - { - "epoch": 1.3405470393748122, - "grad_norm": 1.1755517721176147, - "learning_rate": 2.765754934375313e-05, - "loss": 0.6246, - "step": 151640 - }, - { - "epoch": 1.3406354426351244, - "grad_norm": 2.0831754207611084, - "learning_rate": 2.7656075956081262e-05, - "loss": 0.6343, - "step": 151650 - }, - { - "epoch": 1.3407238458954367, - "grad_norm": 2.5282142162323, - "learning_rate": 2.7654602568409394e-05, - "loss": 0.6059, - "step": 151660 - }, - { - "epoch": 1.3408122491557488, - "grad_norm": 5.200699806213379, - "learning_rate": 2.765312918073752e-05, - "loss": 0.5954, - "step": 151670 - }, - { - "epoch": 1.3409006524160612, - "grad_norm": 2.8896148204803467, - "learning_rate": 2.765165579306565e-05, - "loss": 0.4922, - "step": 151680 - }, - { - "epoch": 1.3409890556763733, - "grad_norm": 1.2419229745864868, - "learning_rate": 2.7650182405393776e-05, - "loss": 0.5508, - "step": 151690 - }, - { - "epoch": 1.3410774589366856, - "grad_norm": 0.9479629397392273, - "learning_rate": 2.7648709017721908e-05, - "loss": 0.6371, - "step": 151700 - }, - { - "epoch": 1.341165862196998, - "grad_norm": 6.157392978668213, - "learning_rate": 2.764723563005004e-05, - "loss": 0.5914, - "step": 151710 - }, - { - "epoch": 1.34125426545731, - "grad_norm": 1.3271961212158203, - "learning_rate": 2.7645762242378164e-05, - "loss": 0.4498, - "step": 151720 - }, - { - "epoch": 1.3413426687176222, - "grad_norm": 1.174003005027771, - "learning_rate": 2.7644288854706296e-05, - "loss": 0.564, - "step": 151730 - }, - { - "epoch": 1.3414310719779345, - "grad_norm": 1.3384922742843628, - "learning_rate": 2.7642815467034428e-05, - "loss": 0.5889, - "step": 151740 - }, - { - "epoch": 1.3415194752382469, - "grad_norm": 4.7183661460876465, - "learning_rate": 2.7641342079362553e-05, - "loss": 0.5477, - "step": 151750 - }, - { - "epoch": 1.341607878498559, - "grad_norm": 3.477849006652832, - "learning_rate": 2.7639868691690685e-05, - "loss": 0.6169, - "step": 151760 - }, - { - "epoch": 1.3416962817588713, - "grad_norm": 1.535959243774414, - "learning_rate": 2.7638395304018816e-05, - "loss": 0.674, - "step": 151770 - }, - { - "epoch": 1.3417846850191835, - "grad_norm": 3.1993985176086426, - "learning_rate": 2.763692191634694e-05, - "loss": 0.6727, - "step": 151780 - }, - { - "epoch": 1.3418730882794958, - "grad_norm": 1.3129761219024658, - "learning_rate": 2.7635448528675073e-05, - "loss": 0.596, - "step": 151790 - }, - { - "epoch": 1.341961491539808, - "grad_norm": 9.249835014343262, - "learning_rate": 2.7633975141003198e-05, - "loss": 0.6903, - "step": 151800 - }, - { - "epoch": 1.3420498948001203, - "grad_norm": 4.096350193023682, - "learning_rate": 2.763250175333133e-05, - "loss": 0.5566, - "step": 151810 - }, - { - "epoch": 1.3421382980604324, - "grad_norm": 1.807973027229309, - "learning_rate": 2.763102836565946e-05, - "loss": 0.5722, - "step": 151820 - }, - { - "epoch": 1.3422267013207447, - "grad_norm": 2.0240368843078613, - "learning_rate": 2.7629554977987586e-05, - "loss": 0.6585, - "step": 151830 - }, - { - "epoch": 1.3423151045810569, - "grad_norm": 16.842954635620117, - "learning_rate": 2.7628081590315718e-05, - "loss": 0.5216, - "step": 151840 - }, - { - "epoch": 1.3424035078413692, - "grad_norm": 2.1680495738983154, - "learning_rate": 2.762660820264385e-05, - "loss": 0.6971, - "step": 151850 - }, - { - "epoch": 1.3424919111016815, - "grad_norm": 0.9763355255126953, - "learning_rate": 2.7625134814971975e-05, - "loss": 0.4742, - "step": 151860 - }, - { - "epoch": 1.3425803143619937, - "grad_norm": 2.114156484603882, - "learning_rate": 2.7623661427300107e-05, - "loss": 0.7132, - "step": 151870 - }, - { - "epoch": 1.3426687176223058, - "grad_norm": 2.558688163757324, - "learning_rate": 2.762218803962824e-05, - "loss": 0.6599, - "step": 151880 - }, - { - "epoch": 1.3427571208826181, - "grad_norm": 6.453088760375977, - "learning_rate": 2.7620714651956363e-05, - "loss": 0.6011, - "step": 151890 - }, - { - "epoch": 1.3428455241429305, - "grad_norm": 2.327014207839966, - "learning_rate": 2.7619241264284495e-05, - "loss": 0.5812, - "step": 151900 - }, - { - "epoch": 1.3429339274032426, - "grad_norm": 1.6957920789718628, - "learning_rate": 2.7617767876612627e-05, - "loss": 0.7609, - "step": 151910 - }, - { - "epoch": 1.343022330663555, - "grad_norm": 1.9946612119674683, - "learning_rate": 2.7616294488940752e-05, - "loss": 0.6668, - "step": 151920 - }, - { - "epoch": 1.343110733923867, - "grad_norm": 2.5781185626983643, - "learning_rate": 2.7614821101268883e-05, - "loss": 0.5896, - "step": 151930 - }, - { - "epoch": 1.3431991371841794, - "grad_norm": 3.432629108428955, - "learning_rate": 2.761334771359701e-05, - "loss": 0.5381, - "step": 151940 - }, - { - "epoch": 1.3432875404444915, - "grad_norm": 7.795658111572266, - "learning_rate": 2.761187432592514e-05, - "loss": 0.7388, - "step": 151950 - }, - { - "epoch": 1.3433759437048038, - "grad_norm": 1.2835272550582886, - "learning_rate": 2.7610400938253272e-05, - "loss": 0.5521, - "step": 151960 - }, - { - "epoch": 1.3434643469651162, - "grad_norm": 2.343592643737793, - "learning_rate": 2.7608927550581397e-05, - "loss": 0.5599, - "step": 151970 - }, - { - "epoch": 1.3435527502254283, - "grad_norm": 1.9953981637954712, - "learning_rate": 2.760745416290953e-05, - "loss": 0.5386, - "step": 151980 - }, - { - "epoch": 1.3436411534857404, - "grad_norm": 4.4138360023498535, - "learning_rate": 2.760598077523766e-05, - "loss": 0.6223, - "step": 151990 - }, - { - "epoch": 1.3437295567460528, - "grad_norm": 3.9960412979125977, - "learning_rate": 2.7604507387565785e-05, - "loss": 0.5672, - "step": 152000 - }, - { - "epoch": 1.343817960006365, - "grad_norm": 1.3082555532455444, - "learning_rate": 2.7603033999893917e-05, - "loss": 0.5066, - "step": 152010 - }, - { - "epoch": 1.3439063632666772, - "grad_norm": 6.851284027099609, - "learning_rate": 2.760156061222205e-05, - "loss": 0.7054, - "step": 152020 - }, - { - "epoch": 1.3439947665269896, - "grad_norm": 6.909270286560059, - "learning_rate": 2.7600087224550174e-05, - "loss": 0.6548, - "step": 152030 - }, - { - "epoch": 1.3440831697873017, - "grad_norm": 24.7659912109375, - "learning_rate": 2.7598613836878306e-05, - "loss": 0.6857, - "step": 152040 - }, - { - "epoch": 1.344171573047614, - "grad_norm": 1.9135289192199707, - "learning_rate": 2.7597140449206434e-05, - "loss": 0.6329, - "step": 152050 - }, - { - "epoch": 1.3442599763079262, - "grad_norm": 1.4441651105880737, - "learning_rate": 2.7595667061534562e-05, - "loss": 0.5593, - "step": 152060 - }, - { - "epoch": 1.3443483795682385, - "grad_norm": 1.8499969244003296, - "learning_rate": 2.7594193673862694e-05, - "loss": 0.57, - "step": 152070 - }, - { - "epoch": 1.3444367828285508, - "grad_norm": 8.440248489379883, - "learning_rate": 2.7592720286190822e-05, - "loss": 0.5884, - "step": 152080 - }, - { - "epoch": 1.344525186088863, - "grad_norm": 1.3343907594680786, - "learning_rate": 2.759124689851895e-05, - "loss": 0.6163, - "step": 152090 - }, - { - "epoch": 1.344613589349175, - "grad_norm": 3.77996826171875, - "learning_rate": 2.7589773510847082e-05, - "loss": 0.6739, - "step": 152100 - }, - { - "epoch": 1.3447019926094874, - "grad_norm": 2.175771951675415, - "learning_rate": 2.758830012317521e-05, - "loss": 0.7437, - "step": 152110 - }, - { - "epoch": 1.3447903958697998, - "grad_norm": 1.1218180656433105, - "learning_rate": 2.758682673550334e-05, - "loss": 0.6503, - "step": 152120 - }, - { - "epoch": 1.3448787991301119, - "grad_norm": 3.5241992473602295, - "learning_rate": 2.758535334783147e-05, - "loss": 0.4788, - "step": 152130 - }, - { - "epoch": 1.3449672023904242, - "grad_norm": 3.1352739334106445, - "learning_rate": 2.75838799601596e-05, - "loss": 0.5824, - "step": 152140 - }, - { - "epoch": 1.3450556056507363, - "grad_norm": 2.206943988800049, - "learning_rate": 2.7582406572487728e-05, - "loss": 0.5998, - "step": 152150 - }, - { - "epoch": 1.3451440089110487, - "grad_norm": 1.5829147100448608, - "learning_rate": 2.7580933184815856e-05, - "loss": 0.6989, - "step": 152160 - }, - { - "epoch": 1.3452324121713608, - "grad_norm": 2.106628179550171, - "learning_rate": 2.7579459797143988e-05, - "loss": 0.5974, - "step": 152170 - }, - { - "epoch": 1.3453208154316731, - "grad_norm": 6.176105976104736, - "learning_rate": 2.7577986409472116e-05, - "loss": 0.7609, - "step": 152180 - }, - { - "epoch": 1.3454092186919855, - "grad_norm": 1.2982898950576782, - "learning_rate": 2.7576513021800244e-05, - "loss": 0.5646, - "step": 152190 - }, - { - "epoch": 1.3454976219522976, - "grad_norm": 0.9172103404998779, - "learning_rate": 2.7575039634128376e-05, - "loss": 0.6589, - "step": 152200 - }, - { - "epoch": 1.3455860252126097, - "grad_norm": 2.8577544689178467, - "learning_rate": 2.7573566246456504e-05, - "loss": 0.5439, - "step": 152210 - }, - { - "epoch": 1.345674428472922, - "grad_norm": 1.339755654335022, - "learning_rate": 2.7572092858784633e-05, - "loss": 0.6511, - "step": 152220 - }, - { - "epoch": 1.3457628317332344, - "grad_norm": 4.50508451461792, - "learning_rate": 2.7570619471112765e-05, - "loss": 0.585, - "step": 152230 - }, - { - "epoch": 1.3458512349935465, - "grad_norm": 1.6642866134643555, - "learning_rate": 2.7569146083440893e-05, - "loss": 0.6008, - "step": 152240 - }, - { - "epoch": 1.3459396382538589, - "grad_norm": 2.5571939945220947, - "learning_rate": 2.756767269576902e-05, - "loss": 0.6415, - "step": 152250 - }, - { - "epoch": 1.346028041514171, - "grad_norm": 2.508664608001709, - "learning_rate": 2.7566199308097153e-05, - "loss": 0.6126, - "step": 152260 - }, - { - "epoch": 1.3461164447744833, - "grad_norm": 2.102313280105591, - "learning_rate": 2.7564725920425278e-05, - "loss": 0.6892, - "step": 152270 - }, - { - "epoch": 1.3462048480347955, - "grad_norm": 1.6047399044036865, - "learning_rate": 2.756325253275341e-05, - "loss": 0.597, - "step": 152280 - }, - { - "epoch": 1.3462932512951078, - "grad_norm": 3.6406137943267822, - "learning_rate": 2.756177914508154e-05, - "loss": 0.5922, - "step": 152290 - }, - { - "epoch": 1.3463816545554201, - "grad_norm": 3.566967010498047, - "learning_rate": 2.7560305757409666e-05, - "loss": 0.6462, - "step": 152300 - }, - { - "epoch": 1.3464700578157323, - "grad_norm": 1.0806243419647217, - "learning_rate": 2.7558832369737798e-05, - "loss": 0.5552, - "step": 152310 - }, - { - "epoch": 1.3465584610760444, - "grad_norm": 23.99992561340332, - "learning_rate": 2.755735898206593e-05, - "loss": 0.6007, - "step": 152320 - }, - { - "epoch": 1.3466468643363567, - "grad_norm": 4.343580722808838, - "learning_rate": 2.7555885594394055e-05, - "loss": 0.6207, - "step": 152330 - }, - { - "epoch": 1.346735267596669, - "grad_norm": 3.176987409591675, - "learning_rate": 2.7554412206722187e-05, - "loss": 0.5343, - "step": 152340 - }, - { - "epoch": 1.3468236708569812, - "grad_norm": 1.0751079320907593, - "learning_rate": 2.755293881905032e-05, - "loss": 0.5602, - "step": 152350 - }, - { - "epoch": 1.3469120741172935, - "grad_norm": 2.112555742263794, - "learning_rate": 2.7551465431378443e-05, - "loss": 0.4778, - "step": 152360 - }, - { - "epoch": 1.3470004773776056, - "grad_norm": 1.2999556064605713, - "learning_rate": 2.7549992043706575e-05, - "loss": 0.4669, - "step": 152370 - }, - { - "epoch": 1.347088880637918, - "grad_norm": 2.515124559402466, - "learning_rate": 2.7548518656034707e-05, - "loss": 0.585, - "step": 152380 - }, - { - "epoch": 1.34717728389823, - "grad_norm": 19.932363510131836, - "learning_rate": 2.7547045268362832e-05, - "loss": 0.4657, - "step": 152390 - }, - { - "epoch": 1.3472656871585424, - "grad_norm": 1.0412105321884155, - "learning_rate": 2.7545571880690964e-05, - "loss": 0.5654, - "step": 152400 - }, - { - "epoch": 1.3473540904188546, - "grad_norm": 4.872016429901123, - "learning_rate": 2.754409849301909e-05, - "loss": 0.6819, - "step": 152410 - }, - { - "epoch": 1.347442493679167, - "grad_norm": 3.62040114402771, - "learning_rate": 2.754262510534722e-05, - "loss": 0.5932, - "step": 152420 - }, - { - "epoch": 1.347530896939479, - "grad_norm": 10.900782585144043, - "learning_rate": 2.7541151717675352e-05, - "loss": 0.5669, - "step": 152430 - }, - { - "epoch": 1.3476193001997914, - "grad_norm": 13.340954780578613, - "learning_rate": 2.7539678330003477e-05, - "loss": 0.6505, - "step": 152440 - }, - { - "epoch": 1.3477077034601037, - "grad_norm": 1.7349427938461304, - "learning_rate": 2.753820494233161e-05, - "loss": 0.5847, - "step": 152450 - }, - { - "epoch": 1.3477961067204158, - "grad_norm": 9.308643341064453, - "learning_rate": 2.753673155465974e-05, - "loss": 0.8377, - "step": 152460 - }, - { - "epoch": 1.347884509980728, - "grad_norm": 3.295382499694824, - "learning_rate": 2.7535258166987865e-05, - "loss": 0.5619, - "step": 152470 - }, - { - "epoch": 1.3479729132410403, - "grad_norm": 18.71682357788086, - "learning_rate": 2.7533784779315997e-05, - "loss": 0.6208, - "step": 152480 - }, - { - "epoch": 1.3480613165013526, - "grad_norm": 14.244182586669922, - "learning_rate": 2.753231139164413e-05, - "loss": 0.6576, - "step": 152490 - }, - { - "epoch": 1.3481497197616648, - "grad_norm": 1.7026153802871704, - "learning_rate": 2.7530838003972254e-05, - "loss": 0.565, - "step": 152500 - }, - { - "epoch": 1.348238123021977, - "grad_norm": 1.675563097000122, - "learning_rate": 2.7529364616300386e-05, - "loss": 0.7649, - "step": 152510 - }, - { - "epoch": 1.3483265262822892, - "grad_norm": 1.9450074434280396, - "learning_rate": 2.752789122862851e-05, - "loss": 0.8025, - "step": 152520 - }, - { - "epoch": 1.3484149295426016, - "grad_norm": 1.710023045539856, - "learning_rate": 2.7526417840956642e-05, - "loss": 0.5054, - "step": 152530 - }, - { - "epoch": 1.3485033328029137, - "grad_norm": 2.0094408988952637, - "learning_rate": 2.7524944453284774e-05, - "loss": 0.5589, - "step": 152540 - }, - { - "epoch": 1.348591736063226, - "grad_norm": 1.119165301322937, - "learning_rate": 2.75234710656129e-05, - "loss": 0.5645, - "step": 152550 - }, - { - "epoch": 1.3486801393235384, - "grad_norm": 3.8932783603668213, - "learning_rate": 2.752199767794103e-05, - "loss": 0.4985, - "step": 152560 - }, - { - "epoch": 1.3487685425838505, - "grad_norm": 2.324028253555298, - "learning_rate": 2.7520524290269163e-05, - "loss": 0.717, - "step": 152570 - }, - { - "epoch": 1.3488569458441626, - "grad_norm": 9.660073280334473, - "learning_rate": 2.7519050902597287e-05, - "loss": 0.5335, - "step": 152580 - }, - { - "epoch": 1.348945349104475, - "grad_norm": 3.22464656829834, - "learning_rate": 2.751757751492542e-05, - "loss": 0.5849, - "step": 152590 - }, - { - "epoch": 1.3490337523647873, - "grad_norm": 1.327293872833252, - "learning_rate": 2.751610412725355e-05, - "loss": 0.5519, - "step": 152600 - }, - { - "epoch": 1.3491221556250994, - "grad_norm": 12.765607833862305, - "learning_rate": 2.7514630739581676e-05, - "loss": 0.5948, - "step": 152610 - }, - { - "epoch": 1.3492105588854117, - "grad_norm": 2.0518267154693604, - "learning_rate": 2.7513157351909808e-05, - "loss": 0.6375, - "step": 152620 - }, - { - "epoch": 1.3492989621457239, - "grad_norm": 1.2443517446517944, - "learning_rate": 2.7511683964237933e-05, - "loss": 0.6497, - "step": 152630 - }, - { - "epoch": 1.3493873654060362, - "grad_norm": 13.505154609680176, - "learning_rate": 2.7510210576566064e-05, - "loss": 0.6146, - "step": 152640 - }, - { - "epoch": 1.3494757686663483, - "grad_norm": 2.0449235439300537, - "learning_rate": 2.7508737188894196e-05, - "loss": 0.661, - "step": 152650 - }, - { - "epoch": 1.3495641719266607, - "grad_norm": 1.1711230278015137, - "learning_rate": 2.750726380122232e-05, - "loss": 0.6156, - "step": 152660 - }, - { - "epoch": 1.349652575186973, - "grad_norm": 2.5636491775512695, - "learning_rate": 2.7505790413550453e-05, - "loss": 0.6465, - "step": 152670 - }, - { - "epoch": 1.3497409784472851, - "grad_norm": 1.8045976161956787, - "learning_rate": 2.7504317025878585e-05, - "loss": 0.5404, - "step": 152680 - }, - { - "epoch": 1.3498293817075973, - "grad_norm": 1.880570650100708, - "learning_rate": 2.750284363820671e-05, - "loss": 0.5684, - "step": 152690 - }, - { - "epoch": 1.3499177849679096, - "grad_norm": 0.8692885637283325, - "learning_rate": 2.750137025053484e-05, - "loss": 0.5905, - "step": 152700 - }, - { - "epoch": 1.350006188228222, - "grad_norm": 1.9509234428405762, - "learning_rate": 2.7499896862862973e-05, - "loss": 0.6271, - "step": 152710 - }, - { - "epoch": 1.350094591488534, - "grad_norm": 1.200966715812683, - "learning_rate": 2.7498423475191098e-05, - "loss": 0.5505, - "step": 152720 - }, - { - "epoch": 1.3501829947488464, - "grad_norm": 2.834299087524414, - "learning_rate": 2.749695008751923e-05, - "loss": 0.6298, - "step": 152730 - }, - { - "epoch": 1.3502713980091585, - "grad_norm": 4.70060396194458, - "learning_rate": 2.7495476699847355e-05, - "loss": 0.7122, - "step": 152740 - }, - { - "epoch": 1.3503598012694709, - "grad_norm": 5.745482444763184, - "learning_rate": 2.7494003312175486e-05, - "loss": 0.55, - "step": 152750 - }, - { - "epoch": 1.350448204529783, - "grad_norm": 1.6211899518966675, - "learning_rate": 2.7492529924503618e-05, - "loss": 0.608, - "step": 152760 - }, - { - "epoch": 1.3505366077900953, - "grad_norm": 3.1293649673461914, - "learning_rate": 2.7491056536831743e-05, - "loss": 0.593, - "step": 152770 - }, - { - "epoch": 1.3506250110504077, - "grad_norm": 1.8907068967819214, - "learning_rate": 2.7489583149159875e-05, - "loss": 0.6112, - "step": 152780 - }, - { - "epoch": 1.3507134143107198, - "grad_norm": 1.62534499168396, - "learning_rate": 2.7488109761488007e-05, - "loss": 0.6411, - "step": 152790 - }, - { - "epoch": 1.350801817571032, - "grad_norm": 1.6003910303115845, - "learning_rate": 2.748663637381613e-05, - "loss": 0.5716, - "step": 152800 - }, - { - "epoch": 1.3508902208313442, - "grad_norm": 3.8206429481506348, - "learning_rate": 2.7485162986144263e-05, - "loss": 0.6705, - "step": 152810 - }, - { - "epoch": 1.3509786240916566, - "grad_norm": 2.7738959789276123, - "learning_rate": 2.7483689598472395e-05, - "loss": 0.5765, - "step": 152820 - }, - { - "epoch": 1.3510670273519687, - "grad_norm": 13.040949821472168, - "learning_rate": 2.748221621080052e-05, - "loss": 0.6266, - "step": 152830 - }, - { - "epoch": 1.351155430612281, - "grad_norm": 3.785677194595337, - "learning_rate": 2.7480742823128652e-05, - "loss": 0.6482, - "step": 152840 - }, - { - "epoch": 1.3512438338725932, - "grad_norm": 2.183802366256714, - "learning_rate": 2.7479269435456784e-05, - "loss": 0.6982, - "step": 152850 - }, - { - "epoch": 1.3513322371329055, - "grad_norm": 1.7078607082366943, - "learning_rate": 2.747779604778491e-05, - "loss": 0.5505, - "step": 152860 - }, - { - "epoch": 1.3514206403932176, - "grad_norm": 2.5055553913116455, - "learning_rate": 2.747632266011304e-05, - "loss": 0.5533, - "step": 152870 - }, - { - "epoch": 1.35150904365353, - "grad_norm": 2.6654164791107178, - "learning_rate": 2.7474849272441165e-05, - "loss": 0.6827, - "step": 152880 - }, - { - "epoch": 1.3515974469138423, - "grad_norm": 2.8913371562957764, - "learning_rate": 2.7473375884769297e-05, - "loss": 0.5377, - "step": 152890 - }, - { - "epoch": 1.3516858501741544, - "grad_norm": 0.7278856635093689, - "learning_rate": 2.747190249709743e-05, - "loss": 0.4603, - "step": 152900 - }, - { - "epoch": 1.3517742534344666, - "grad_norm": 1.6657037734985352, - "learning_rate": 2.7470429109425554e-05, - "loss": 0.5335, - "step": 152910 - }, - { - "epoch": 1.351862656694779, - "grad_norm": 3.048320770263672, - "learning_rate": 2.7468955721753685e-05, - "loss": 0.6495, - "step": 152920 - }, - { - "epoch": 1.3519510599550912, - "grad_norm": 1.6921703815460205, - "learning_rate": 2.7467482334081817e-05, - "loss": 0.5694, - "step": 152930 - }, - { - "epoch": 1.3520394632154034, - "grad_norm": 4.21888542175293, - "learning_rate": 2.7466008946409942e-05, - "loss": 0.5306, - "step": 152940 - }, - { - "epoch": 1.3521278664757157, - "grad_norm": 1.5649628639221191, - "learning_rate": 2.7464535558738074e-05, - "loss": 0.7282, - "step": 152950 - }, - { - "epoch": 1.3522162697360278, - "grad_norm": 13.663272857666016, - "learning_rate": 2.7463062171066206e-05, - "loss": 0.7725, - "step": 152960 - }, - { - "epoch": 1.3523046729963402, - "grad_norm": 7.526825904846191, - "learning_rate": 2.746158878339433e-05, - "loss": 0.6897, - "step": 152970 - }, - { - "epoch": 1.3523930762566523, - "grad_norm": 1.556532621383667, - "learning_rate": 2.7460115395722462e-05, - "loss": 0.6829, - "step": 152980 - }, - { - "epoch": 1.3524814795169646, - "grad_norm": 7.2153000831604, - "learning_rate": 2.745864200805059e-05, - "loss": 0.5147, - "step": 152990 - }, - { - "epoch": 1.3525698827772767, - "grad_norm": 1.6668345928192139, - "learning_rate": 2.745716862037872e-05, - "loss": 0.57, - "step": 153000 - }, - { - "epoch": 1.352658286037589, - "grad_norm": 5.36071252822876, - "learning_rate": 2.745569523270685e-05, - "loss": 0.5818, - "step": 153010 - }, - { - "epoch": 1.3527466892979012, - "grad_norm": 3.1386351585388184, - "learning_rate": 2.745422184503498e-05, - "loss": 0.6623, - "step": 153020 - }, - { - "epoch": 1.3528350925582135, - "grad_norm": 1.5236743688583374, - "learning_rate": 2.7452748457363107e-05, - "loss": 0.4788, - "step": 153030 - }, - { - "epoch": 1.352923495818526, - "grad_norm": 1.3319661617279053, - "learning_rate": 2.745127506969124e-05, - "loss": 0.6293, - "step": 153040 - }, - { - "epoch": 1.353011899078838, - "grad_norm": 2.216249942779541, - "learning_rate": 2.7449801682019368e-05, - "loss": 0.7444, - "step": 153050 - }, - { - "epoch": 1.3531003023391501, - "grad_norm": 4.037188529968262, - "learning_rate": 2.7448328294347496e-05, - "loss": 0.6448, - "step": 153060 - }, - { - "epoch": 1.3531887055994625, - "grad_norm": 1.6134544610977173, - "learning_rate": 2.7446854906675628e-05, - "loss": 0.5382, - "step": 153070 - }, - { - "epoch": 1.3532771088597748, - "grad_norm": 12.962087631225586, - "learning_rate": 2.7445381519003756e-05, - "loss": 0.6484, - "step": 153080 - }, - { - "epoch": 1.353365512120087, - "grad_norm": 1.941424012184143, - "learning_rate": 2.7443908131331884e-05, - "loss": 0.61, - "step": 153090 - }, - { - "epoch": 1.3534539153803993, - "grad_norm": 7.85158634185791, - "learning_rate": 2.7442434743660013e-05, - "loss": 0.6142, - "step": 153100 - }, - { - "epoch": 1.3535423186407114, - "grad_norm": 17.405946731567383, - "learning_rate": 2.7440961355988144e-05, - "loss": 0.64, - "step": 153110 - }, - { - "epoch": 1.3536307219010237, - "grad_norm": 2.0815839767456055, - "learning_rate": 2.7439487968316273e-05, - "loss": 0.5992, - "step": 153120 - }, - { - "epoch": 1.3537191251613359, - "grad_norm": 1.178189754486084, - "learning_rate": 2.74380145806444e-05, - "loss": 0.6646, - "step": 153130 - }, - { - "epoch": 1.3538075284216482, - "grad_norm": 4.812051296234131, - "learning_rate": 2.7436541192972533e-05, - "loss": 0.6036, - "step": 153140 - }, - { - "epoch": 1.3538959316819605, - "grad_norm": 8.786940574645996, - "learning_rate": 2.743506780530066e-05, - "loss": 0.617, - "step": 153150 - }, - { - "epoch": 1.3539843349422727, - "grad_norm": 3.088794231414795, - "learning_rate": 2.743359441762879e-05, - "loss": 0.5355, - "step": 153160 - }, - { - "epoch": 1.3540727382025848, - "grad_norm": 1.9327174425125122, - "learning_rate": 2.743212102995692e-05, - "loss": 0.4877, - "step": 153170 - }, - { - "epoch": 1.3541611414628971, - "grad_norm": 17.121843338012695, - "learning_rate": 2.743064764228505e-05, - "loss": 0.6143, - "step": 153180 - }, - { - "epoch": 1.3542495447232095, - "grad_norm": 1.1468859910964966, - "learning_rate": 2.7429174254613178e-05, - "loss": 0.5918, - "step": 153190 - }, - { - "epoch": 1.3543379479835216, - "grad_norm": 0.9244375228881836, - "learning_rate": 2.742770086694131e-05, - "loss": 0.636, - "step": 153200 - }, - { - "epoch": 1.354426351243834, - "grad_norm": 1.7848541736602783, - "learning_rate": 2.7426227479269435e-05, - "loss": 0.4962, - "step": 153210 - }, - { - "epoch": 1.354514754504146, - "grad_norm": 2.681871175765991, - "learning_rate": 2.7424754091597566e-05, - "loss": 0.5631, - "step": 153220 - }, - { - "epoch": 1.3546031577644584, - "grad_norm": 1.842670202255249, - "learning_rate": 2.7423280703925698e-05, - "loss": 0.6412, - "step": 153230 - }, - { - "epoch": 1.3546915610247705, - "grad_norm": 3.063415765762329, - "learning_rate": 2.7421807316253823e-05, - "loss": 0.61, - "step": 153240 - }, - { - "epoch": 1.3547799642850828, - "grad_norm": 13.67172622680664, - "learning_rate": 2.7420333928581955e-05, - "loss": 0.8023, - "step": 153250 - }, - { - "epoch": 1.3548683675453952, - "grad_norm": 2.581256866455078, - "learning_rate": 2.7418860540910087e-05, - "loss": 0.4521, - "step": 153260 - }, - { - "epoch": 1.3549567708057073, - "grad_norm": 0.9682684540748596, - "learning_rate": 2.741738715323821e-05, - "loss": 0.6063, - "step": 153270 - }, - { - "epoch": 1.3550451740660194, - "grad_norm": 2.6267082691192627, - "learning_rate": 2.7415913765566343e-05, - "loss": 0.5755, - "step": 153280 - }, - { - "epoch": 1.3551335773263318, - "grad_norm": 14.42762279510498, - "learning_rate": 2.7414440377894475e-05, - "loss": 0.578, - "step": 153290 - }, - { - "epoch": 1.3552219805866441, - "grad_norm": 7.375821113586426, - "learning_rate": 2.74129669902226e-05, - "loss": 0.544, - "step": 153300 - }, - { - "epoch": 1.3553103838469562, - "grad_norm": 3.555516242980957, - "learning_rate": 2.7411493602550732e-05, - "loss": 0.6956, - "step": 153310 - }, - { - "epoch": 1.3553987871072686, - "grad_norm": 2.0015869140625, - "learning_rate": 2.7410020214878864e-05, - "loss": 0.6575, - "step": 153320 - }, - { - "epoch": 1.3554871903675807, - "grad_norm": 3.1677801609039307, - "learning_rate": 2.740854682720699e-05, - "loss": 0.6726, - "step": 153330 - }, - { - "epoch": 1.355575593627893, - "grad_norm": 1.8652198314666748, - "learning_rate": 2.740707343953512e-05, - "loss": 0.7265, - "step": 153340 - }, - { - "epoch": 1.3556639968882052, - "grad_norm": 5.433342933654785, - "learning_rate": 2.7405600051863245e-05, - "loss": 0.585, - "step": 153350 - }, - { - "epoch": 1.3557524001485175, - "grad_norm": 9.56826114654541, - "learning_rate": 2.7404126664191377e-05, - "loss": 0.6202, - "step": 153360 - }, - { - "epoch": 1.3558408034088298, - "grad_norm": 1.6164175271987915, - "learning_rate": 2.740265327651951e-05, - "loss": 0.7429, - "step": 153370 - }, - { - "epoch": 1.355929206669142, - "grad_norm": 2.347870111465454, - "learning_rate": 2.7401179888847634e-05, - "loss": 0.6675, - "step": 153380 - }, - { - "epoch": 1.356017609929454, - "grad_norm": 4.753960609436035, - "learning_rate": 2.7399706501175765e-05, - "loss": 0.6834, - "step": 153390 - }, - { - "epoch": 1.3561060131897664, - "grad_norm": 4.895615100860596, - "learning_rate": 2.7398233113503897e-05, - "loss": 0.6348, - "step": 153400 - }, - { - "epoch": 1.3561944164500788, - "grad_norm": 1.5488193035125732, - "learning_rate": 2.7396759725832022e-05, - "loss": 0.6819, - "step": 153410 - }, - { - "epoch": 1.3562828197103909, - "grad_norm": 3.036613941192627, - "learning_rate": 2.7395286338160154e-05, - "loss": 0.5595, - "step": 153420 - }, - { - "epoch": 1.3563712229707032, - "grad_norm": 3.7243666648864746, - "learning_rate": 2.7393812950488286e-05, - "loss": 0.6052, - "step": 153430 - }, - { - "epoch": 1.3564596262310153, - "grad_norm": 3.369673013687134, - "learning_rate": 2.739233956281641e-05, - "loss": 0.7209, - "step": 153440 - }, - { - "epoch": 1.3565480294913277, - "grad_norm": 1.6963940858840942, - "learning_rate": 2.7390866175144542e-05, - "loss": 0.7019, - "step": 153450 - }, - { - "epoch": 1.3566364327516398, - "grad_norm": 3.1766574382781982, - "learning_rate": 2.7389392787472667e-05, - "loss": 0.4618, - "step": 153460 - }, - { - "epoch": 1.3567248360119522, - "grad_norm": 1.2094073295593262, - "learning_rate": 2.73879193998008e-05, - "loss": 0.6013, - "step": 153470 - }, - { - "epoch": 1.3568132392722645, - "grad_norm": 6.967949867248535, - "learning_rate": 2.738644601212893e-05, - "loss": 0.6755, - "step": 153480 - }, - { - "epoch": 1.3569016425325766, - "grad_norm": 1.9498794078826904, - "learning_rate": 2.7384972624457056e-05, - "loss": 0.5856, - "step": 153490 - }, - { - "epoch": 1.3569900457928887, - "grad_norm": 15.112519264221191, - "learning_rate": 2.7383499236785188e-05, - "loss": 0.6213, - "step": 153500 - }, - { - "epoch": 1.357078449053201, - "grad_norm": 1.4778276681900024, - "learning_rate": 2.738202584911332e-05, - "loss": 0.5613, - "step": 153510 - }, - { - "epoch": 1.3571668523135134, - "grad_norm": 2.414613723754883, - "learning_rate": 2.7380552461441444e-05, - "loss": 0.5225, - "step": 153520 - }, - { - "epoch": 1.3572552555738255, - "grad_norm": 6.527881622314453, - "learning_rate": 2.7379079073769576e-05, - "loss": 0.5431, - "step": 153530 - }, - { - "epoch": 1.3573436588341379, - "grad_norm": 1.3349475860595703, - "learning_rate": 2.7377605686097708e-05, - "loss": 0.7061, - "step": 153540 - }, - { - "epoch": 1.35743206209445, - "grad_norm": 3.2020413875579834, - "learning_rate": 2.7376132298425833e-05, - "loss": 0.5888, - "step": 153550 - }, - { - "epoch": 1.3575204653547623, - "grad_norm": 2.6947829723358154, - "learning_rate": 2.7374658910753964e-05, - "loss": 0.7561, - "step": 153560 - }, - { - "epoch": 1.3576088686150745, - "grad_norm": 4.183035373687744, - "learning_rate": 2.737318552308209e-05, - "loss": 0.6214, - "step": 153570 - }, - { - "epoch": 1.3576972718753868, - "grad_norm": 5.074467182159424, - "learning_rate": 2.737171213541022e-05, - "loss": 0.6269, - "step": 153580 - }, - { - "epoch": 1.357785675135699, - "grad_norm": 1.4343229532241821, - "learning_rate": 2.7370238747738353e-05, - "loss": 0.5334, - "step": 153590 - }, - { - "epoch": 1.3578740783960113, - "grad_norm": 1.9551198482513428, - "learning_rate": 2.7368765360066478e-05, - "loss": 0.6199, - "step": 153600 - }, - { - "epoch": 1.3579624816563234, - "grad_norm": 7.2975993156433105, - "learning_rate": 2.736729197239461e-05, - "loss": 0.6635, - "step": 153610 - }, - { - "epoch": 1.3580508849166357, - "grad_norm": 15.22257137298584, - "learning_rate": 2.736581858472274e-05, - "loss": 0.7037, - "step": 153620 - }, - { - "epoch": 1.358139288176948, - "grad_norm": 4.397807598114014, - "learning_rate": 2.7364345197050866e-05, - "loss": 0.625, - "step": 153630 - }, - { - "epoch": 1.3582276914372602, - "grad_norm": 23.78542709350586, - "learning_rate": 2.7362871809378998e-05, - "loss": 0.6663, - "step": 153640 - }, - { - "epoch": 1.3583160946975725, - "grad_norm": 4.08134126663208, - "learning_rate": 2.736139842170713e-05, - "loss": 0.6032, - "step": 153650 - }, - { - "epoch": 1.3584044979578846, - "grad_norm": 1.7195130586624146, - "learning_rate": 2.7359925034035255e-05, - "loss": 0.6663, - "step": 153660 - }, - { - "epoch": 1.358492901218197, - "grad_norm": 4.62075662612915, - "learning_rate": 2.7358451646363386e-05, - "loss": 0.6765, - "step": 153670 - }, - { - "epoch": 1.358581304478509, - "grad_norm": 1.8698654174804688, - "learning_rate": 2.735697825869151e-05, - "loss": 0.6794, - "step": 153680 - }, - { - "epoch": 1.3586697077388215, - "grad_norm": 8.863204956054688, - "learning_rate": 2.7355504871019643e-05, - "loss": 0.6126, - "step": 153690 - }, - { - "epoch": 1.3587581109991336, - "grad_norm": 3.2313244342803955, - "learning_rate": 2.7354031483347775e-05, - "loss": 0.6543, - "step": 153700 - }, - { - "epoch": 1.358846514259446, - "grad_norm": 11.454544067382812, - "learning_rate": 2.73525580956759e-05, - "loss": 0.6134, - "step": 153710 - }, - { - "epoch": 1.358934917519758, - "grad_norm": 7.350795269012451, - "learning_rate": 2.735108470800403e-05, - "loss": 0.515, - "step": 153720 - }, - { - "epoch": 1.3590233207800704, - "grad_norm": 1.5754120349884033, - "learning_rate": 2.7349611320332163e-05, - "loss": 0.5599, - "step": 153730 - }, - { - "epoch": 1.3591117240403827, - "grad_norm": 1.1125742197036743, - "learning_rate": 2.734813793266029e-05, - "loss": 0.546, - "step": 153740 - }, - { - "epoch": 1.3592001273006948, - "grad_norm": 2.5498523712158203, - "learning_rate": 2.734666454498842e-05, - "loss": 0.4945, - "step": 153750 - }, - { - "epoch": 1.359288530561007, - "grad_norm": 4.113958835601807, - "learning_rate": 2.7345191157316552e-05, - "loss": 0.6604, - "step": 153760 - }, - { - "epoch": 1.3593769338213193, - "grad_norm": 2.057119369506836, - "learning_rate": 2.7343717769644677e-05, - "loss": 0.4741, - "step": 153770 - }, - { - "epoch": 1.3594653370816316, - "grad_norm": 4.867197036743164, - "learning_rate": 2.734224438197281e-05, - "loss": 0.7166, - "step": 153780 - }, - { - "epoch": 1.3595537403419438, - "grad_norm": 2.3347949981689453, - "learning_rate": 2.734077099430094e-05, - "loss": 0.7042, - "step": 153790 - }, - { - "epoch": 1.359642143602256, - "grad_norm": 4.11530876159668, - "learning_rate": 2.7339297606629065e-05, - "loss": 0.7689, - "step": 153800 - }, - { - "epoch": 1.3597305468625682, - "grad_norm": 2.281843423843384, - "learning_rate": 2.7337824218957197e-05, - "loss": 0.6405, - "step": 153810 - }, - { - "epoch": 1.3598189501228806, - "grad_norm": 4.6040449142456055, - "learning_rate": 2.7336350831285322e-05, - "loss": 0.6455, - "step": 153820 - }, - { - "epoch": 1.3599073533831927, - "grad_norm": 2.226038694381714, - "learning_rate": 2.7334877443613454e-05, - "loss": 0.5921, - "step": 153830 - }, - { - "epoch": 1.359995756643505, - "grad_norm": 18.165918350219727, - "learning_rate": 2.7333404055941585e-05, - "loss": 0.5922, - "step": 153840 - }, - { - "epoch": 1.3600841599038174, - "grad_norm": 5.217060565948486, - "learning_rate": 2.733193066826971e-05, - "loss": 0.6279, - "step": 153850 - }, - { - "epoch": 1.3601725631641295, - "grad_norm": 1.1759393215179443, - "learning_rate": 2.7330457280597842e-05, - "loss": 0.5592, - "step": 153860 - }, - { - "epoch": 1.3602609664244416, - "grad_norm": 2.8378050327301025, - "learning_rate": 2.7328983892925974e-05, - "loss": 0.7516, - "step": 153870 - }, - { - "epoch": 1.360349369684754, - "grad_norm": 1.9023247957229614, - "learning_rate": 2.73275105052541e-05, - "loss": 0.6084, - "step": 153880 - }, - { - "epoch": 1.3604377729450663, - "grad_norm": 11.13372802734375, - "learning_rate": 2.732603711758223e-05, - "loss": 0.5913, - "step": 153890 - }, - { - "epoch": 1.3605261762053784, - "grad_norm": 1.1735844612121582, - "learning_rate": 2.7324563729910362e-05, - "loss": 0.7321, - "step": 153900 - }, - { - "epoch": 1.3606145794656908, - "grad_norm": 2.9435930252075195, - "learning_rate": 2.7323090342238487e-05, - "loss": 0.5642, - "step": 153910 - }, - { - "epoch": 1.3607029827260029, - "grad_norm": 9.626900672912598, - "learning_rate": 2.732161695456662e-05, - "loss": 0.6412, - "step": 153920 - }, - { - "epoch": 1.3607913859863152, - "grad_norm": 2.715320110321045, - "learning_rate": 2.7320143566894747e-05, - "loss": 0.6761, - "step": 153930 - }, - { - "epoch": 1.3608797892466273, - "grad_norm": 1.598920464515686, - "learning_rate": 2.7318670179222876e-05, - "loss": 0.6351, - "step": 153940 - }, - { - "epoch": 1.3609681925069397, - "grad_norm": 3.0665156841278076, - "learning_rate": 2.7317196791551007e-05, - "loss": 0.6981, - "step": 153950 - }, - { - "epoch": 1.361056595767252, - "grad_norm": 3.8954806327819824, - "learning_rate": 2.7315723403879136e-05, - "loss": 0.6504, - "step": 153960 - }, - { - "epoch": 1.3611449990275641, - "grad_norm": 3.292116403579712, - "learning_rate": 2.7314250016207264e-05, - "loss": 0.5992, - "step": 153970 - }, - { - "epoch": 1.3612334022878763, - "grad_norm": 1.0852166414260864, - "learning_rate": 2.7312776628535396e-05, - "loss": 0.5186, - "step": 153980 - }, - { - "epoch": 1.3613218055481886, - "grad_norm": 3.1986804008483887, - "learning_rate": 2.7311303240863524e-05, - "loss": 0.6638, - "step": 153990 - }, - { - "epoch": 1.361410208808501, - "grad_norm": 1.7799561023712158, - "learning_rate": 2.7309829853191653e-05, - "loss": 0.6368, - "step": 154000 - }, - { - "epoch": 1.361498612068813, - "grad_norm": 10.748804092407227, - "learning_rate": 2.7308356465519784e-05, - "loss": 0.6364, - "step": 154010 - }, - { - "epoch": 1.3615870153291254, - "grad_norm": 1.81399405002594, - "learning_rate": 2.7306883077847913e-05, - "loss": 0.5802, - "step": 154020 - }, - { - "epoch": 1.3616754185894375, - "grad_norm": 2.1766295433044434, - "learning_rate": 2.730540969017604e-05, - "loss": 0.6595, - "step": 154030 - }, - { - "epoch": 1.3617638218497499, - "grad_norm": 7.2332282066345215, - "learning_rate": 2.730393630250417e-05, - "loss": 0.6622, - "step": 154040 - }, - { - "epoch": 1.361852225110062, - "grad_norm": 4.610494613647461, - "learning_rate": 2.73024629148323e-05, - "loss": 0.5962, - "step": 154050 - }, - { - "epoch": 1.3619406283703743, - "grad_norm": 1.7780160903930664, - "learning_rate": 2.730098952716043e-05, - "loss": 0.5994, - "step": 154060 - }, - { - "epoch": 1.3620290316306867, - "grad_norm": 5.483056545257568, - "learning_rate": 2.7299516139488558e-05, - "loss": 0.6641, - "step": 154070 - }, - { - "epoch": 1.3621174348909988, - "grad_norm": 2.305213451385498, - "learning_rate": 2.729804275181669e-05, - "loss": 0.6899, - "step": 154080 - }, - { - "epoch": 1.362205838151311, - "grad_norm": 2.8561348915100098, - "learning_rate": 2.7296569364144818e-05, - "loss": 0.5653, - "step": 154090 - }, - { - "epoch": 1.3622942414116233, - "grad_norm": 7.68094539642334, - "learning_rate": 2.7295095976472946e-05, - "loss": 0.6154, - "step": 154100 - }, - { - "epoch": 1.3623826446719356, - "grad_norm": 0.6679326295852661, - "learning_rate": 2.7293622588801078e-05, - "loss": 0.5264, - "step": 154110 - }, - { - "epoch": 1.3624710479322477, - "grad_norm": 2.8364195823669434, - "learning_rate": 2.7292149201129206e-05, - "loss": 0.5505, - "step": 154120 - }, - { - "epoch": 1.36255945119256, - "grad_norm": 0.8018041849136353, - "learning_rate": 2.7290675813457335e-05, - "loss": 0.4773, - "step": 154130 - }, - { - "epoch": 1.3626478544528722, - "grad_norm": 1.8438262939453125, - "learning_rate": 2.7289202425785467e-05, - "loss": 0.6894, - "step": 154140 - }, - { - "epoch": 1.3627362577131845, - "grad_norm": 2.2903082370758057, - "learning_rate": 2.7287729038113595e-05, - "loss": 0.658, - "step": 154150 - }, - { - "epoch": 1.3628246609734966, - "grad_norm": 6.520436763763428, - "learning_rate": 2.7286255650441723e-05, - "loss": 0.668, - "step": 154160 - }, - { - "epoch": 1.362913064233809, - "grad_norm": 1.4125345945358276, - "learning_rate": 2.7284782262769855e-05, - "loss": 0.555, - "step": 154170 - }, - { - "epoch": 1.3630014674941213, - "grad_norm": 4.110620975494385, - "learning_rate": 2.728330887509798e-05, - "loss": 0.6559, - "step": 154180 - }, - { - "epoch": 1.3630898707544334, - "grad_norm": 2.6496288776397705, - "learning_rate": 2.7281835487426112e-05, - "loss": 0.6572, - "step": 154190 - }, - { - "epoch": 1.3631782740147456, - "grad_norm": 1.4097830057144165, - "learning_rate": 2.7280362099754243e-05, - "loss": 0.5737, - "step": 154200 - }, - { - "epoch": 1.363266677275058, - "grad_norm": 2.554978370666504, - "learning_rate": 2.727888871208237e-05, - "loss": 0.5654, - "step": 154210 - }, - { - "epoch": 1.3633550805353702, - "grad_norm": 4.791134834289551, - "learning_rate": 2.72774153244105e-05, - "loss": 0.798, - "step": 154220 - }, - { - "epoch": 1.3634434837956824, - "grad_norm": 5.3456621170043945, - "learning_rate": 2.7275941936738632e-05, - "loss": 0.4949, - "step": 154230 - }, - { - "epoch": 1.3635318870559947, - "grad_norm": 1.2459648847579956, - "learning_rate": 2.7274468549066757e-05, - "loss": 0.6048, - "step": 154240 - }, - { - "epoch": 1.3636202903163068, - "grad_norm": 3.840247869491577, - "learning_rate": 2.727299516139489e-05, - "loss": 0.6245, - "step": 154250 - }, - { - "epoch": 1.3637086935766192, - "grad_norm": 5.195685863494873, - "learning_rate": 2.727152177372302e-05, - "loss": 0.5703, - "step": 154260 - }, - { - "epoch": 1.3637970968369313, - "grad_norm": 1.270114541053772, - "learning_rate": 2.7270048386051145e-05, - "loss": 0.5616, - "step": 154270 - }, - { - "epoch": 1.3638855000972436, - "grad_norm": 2.738403081893921, - "learning_rate": 2.7268574998379277e-05, - "loss": 0.6586, - "step": 154280 - }, - { - "epoch": 1.3639739033575558, - "grad_norm": 2.4280507564544678, - "learning_rate": 2.7267101610707402e-05, - "loss": 0.5508, - "step": 154290 - }, - { - "epoch": 1.364062306617868, - "grad_norm": 3.822091579437256, - "learning_rate": 2.7265628223035534e-05, - "loss": 0.4753, - "step": 154300 - }, - { - "epoch": 1.3641507098781802, - "grad_norm": 9.027368545532227, - "learning_rate": 2.7264154835363666e-05, - "loss": 0.5569, - "step": 154310 - }, - { - "epoch": 1.3642391131384926, - "grad_norm": 9.11761474609375, - "learning_rate": 2.726268144769179e-05, - "loss": 0.7106, - "step": 154320 - }, - { - "epoch": 1.364327516398805, - "grad_norm": 1.0846797227859497, - "learning_rate": 2.7261208060019922e-05, - "loss": 0.5674, - "step": 154330 - }, - { - "epoch": 1.364415919659117, - "grad_norm": 1.4489777088165283, - "learning_rate": 2.7259734672348054e-05, - "loss": 0.5001, - "step": 154340 - }, - { - "epoch": 1.3645043229194291, - "grad_norm": 3.4873478412628174, - "learning_rate": 2.725826128467618e-05, - "loss": 0.6888, - "step": 154350 - }, - { - "epoch": 1.3645927261797415, - "grad_norm": 3.128005027770996, - "learning_rate": 2.725678789700431e-05, - "loss": 0.7207, - "step": 154360 - }, - { - "epoch": 1.3646811294400538, - "grad_norm": 4.4434428215026855, - "learning_rate": 2.7255314509332442e-05, - "loss": 0.7486, - "step": 154370 - }, - { - "epoch": 1.364769532700366, - "grad_norm": 7.020805358886719, - "learning_rate": 2.7253841121660567e-05, - "loss": 0.6439, - "step": 154380 - }, - { - "epoch": 1.3648579359606783, - "grad_norm": 1.9832072257995605, - "learning_rate": 2.72523677339887e-05, - "loss": 0.6318, - "step": 154390 - }, - { - "epoch": 1.3649463392209904, - "grad_norm": 8.416921615600586, - "learning_rate": 2.7250894346316824e-05, - "loss": 0.7716, - "step": 154400 - }, - { - "epoch": 1.3650347424813027, - "grad_norm": 17.864105224609375, - "learning_rate": 2.7249420958644956e-05, - "loss": 0.5384, - "step": 154410 - }, - { - "epoch": 1.3651231457416149, - "grad_norm": 2.9790709018707275, - "learning_rate": 2.7247947570973088e-05, - "loss": 0.6522, - "step": 154420 - }, - { - "epoch": 1.3652115490019272, - "grad_norm": 1.2897685766220093, - "learning_rate": 2.7246474183301213e-05, - "loss": 0.7234, - "step": 154430 - }, - { - "epoch": 1.3652999522622395, - "grad_norm": 12.144488334655762, - "learning_rate": 2.7245000795629344e-05, - "loss": 0.6173, - "step": 154440 - }, - { - "epoch": 1.3653883555225517, - "grad_norm": 8.407711029052734, - "learning_rate": 2.7243527407957476e-05, - "loss": 0.5009, - "step": 154450 - }, - { - "epoch": 1.3654767587828638, - "grad_norm": 0.5981799960136414, - "learning_rate": 2.72420540202856e-05, - "loss": 0.7288, - "step": 154460 - }, - { - "epoch": 1.3655651620431761, - "grad_norm": 4.110251426696777, - "learning_rate": 2.7240580632613733e-05, - "loss": 0.5727, - "step": 154470 - }, - { - "epoch": 1.3656535653034885, - "grad_norm": 2.2072997093200684, - "learning_rate": 2.7239107244941864e-05, - "loss": 0.5492, - "step": 154480 - }, - { - "epoch": 1.3657419685638006, - "grad_norm": 3.550320863723755, - "learning_rate": 2.723763385726999e-05, - "loss": 0.6136, - "step": 154490 - }, - { - "epoch": 1.365830371824113, - "grad_norm": 2.081270933151245, - "learning_rate": 2.723616046959812e-05, - "loss": 0.5203, - "step": 154500 - }, - { - "epoch": 1.365918775084425, - "grad_norm": 1.412699818611145, - "learning_rate": 2.7234687081926246e-05, - "loss": 0.5462, - "step": 154510 - }, - { - "epoch": 1.3660071783447374, - "grad_norm": 11.623519897460938, - "learning_rate": 2.7233213694254378e-05, - "loss": 0.4878, - "step": 154520 - }, - { - "epoch": 1.3660955816050495, - "grad_norm": 1.180148959159851, - "learning_rate": 2.723174030658251e-05, - "loss": 0.6776, - "step": 154530 - }, - { - "epoch": 1.3661839848653619, - "grad_norm": 4.216616153717041, - "learning_rate": 2.7230266918910635e-05, - "loss": 0.7044, - "step": 154540 - }, - { - "epoch": 1.3662723881256742, - "grad_norm": 3.270925521850586, - "learning_rate": 2.7228793531238766e-05, - "loss": 0.6919, - "step": 154550 - }, - { - "epoch": 1.3663607913859863, - "grad_norm": 1.9662703275680542, - "learning_rate": 2.7227320143566898e-05, - "loss": 0.6106, - "step": 154560 - }, - { - "epoch": 1.3664491946462984, - "grad_norm": 7.437923431396484, - "learning_rate": 2.7225846755895023e-05, - "loss": 0.5622, - "step": 154570 - }, - { - "epoch": 1.3665375979066108, - "grad_norm": 2.8365561962127686, - "learning_rate": 2.7224373368223155e-05, - "loss": 0.522, - "step": 154580 - }, - { - "epoch": 1.3666260011669231, - "grad_norm": 3.4499101638793945, - "learning_rate": 2.7222899980551287e-05, - "loss": 0.6886, - "step": 154590 - }, - { - "epoch": 1.3667144044272352, - "grad_norm": 2.752417802810669, - "learning_rate": 2.722142659287941e-05, - "loss": 0.6583, - "step": 154600 - }, - { - "epoch": 1.3668028076875476, - "grad_norm": 2.1933319568634033, - "learning_rate": 2.7219953205207543e-05, - "loss": 0.6067, - "step": 154610 - }, - { - "epoch": 1.3668912109478597, - "grad_norm": 6.355262756347656, - "learning_rate": 2.7218479817535675e-05, - "loss": 0.6092, - "step": 154620 - }, - { - "epoch": 1.366979614208172, - "grad_norm": 5.629115104675293, - "learning_rate": 2.72170064298638e-05, - "loss": 0.5589, - "step": 154630 - }, - { - "epoch": 1.3670680174684842, - "grad_norm": 1.2412418127059937, - "learning_rate": 2.721553304219193e-05, - "loss": 0.6126, - "step": 154640 - }, - { - "epoch": 1.3671564207287965, - "grad_norm": 6.742635250091553, - "learning_rate": 2.7214059654520057e-05, - "loss": 0.5539, - "step": 154650 - }, - { - "epoch": 1.3672448239891088, - "grad_norm": 3.1886813640594482, - "learning_rate": 2.721258626684819e-05, - "loss": 0.5567, - "step": 154660 - }, - { - "epoch": 1.367333227249421, - "grad_norm": 1.7838319540023804, - "learning_rate": 2.721111287917632e-05, - "loss": 0.7247, - "step": 154670 - }, - { - "epoch": 1.367421630509733, - "grad_norm": 3.322016954421997, - "learning_rate": 2.7209639491504445e-05, - "loss": 0.5633, - "step": 154680 - }, - { - "epoch": 1.3675100337700454, - "grad_norm": 4.518759727478027, - "learning_rate": 2.7208166103832577e-05, - "loss": 0.537, - "step": 154690 - }, - { - "epoch": 1.3675984370303578, - "grad_norm": 16.524864196777344, - "learning_rate": 2.720669271616071e-05, - "loss": 0.5959, - "step": 154700 - }, - { - "epoch": 1.36768684029067, - "grad_norm": 16.19443130493164, - "learning_rate": 2.7205219328488834e-05, - "loss": 0.6194, - "step": 154710 - }, - { - "epoch": 1.3677752435509822, - "grad_norm": 1.410196304321289, - "learning_rate": 2.7203745940816965e-05, - "loss": 0.6283, - "step": 154720 - }, - { - "epoch": 1.3678636468112944, - "grad_norm": 2.368764638900757, - "learning_rate": 2.7202272553145097e-05, - "loss": 0.6763, - "step": 154730 - }, - { - "epoch": 1.3679520500716067, - "grad_norm": 2.4012529850006104, - "learning_rate": 2.7200799165473222e-05, - "loss": 0.5435, - "step": 154740 - }, - { - "epoch": 1.3680404533319188, - "grad_norm": 0.8090259432792664, - "learning_rate": 2.7199325777801354e-05, - "loss": 0.5123, - "step": 154750 - }, - { - "epoch": 1.3681288565922312, - "grad_norm": 2.2534990310668945, - "learning_rate": 2.719785239012948e-05, - "loss": 0.4984, - "step": 154760 - }, - { - "epoch": 1.3682172598525435, - "grad_norm": 1.3646174669265747, - "learning_rate": 2.719637900245761e-05, - "loss": 0.5332, - "step": 154770 - }, - { - "epoch": 1.3683056631128556, - "grad_norm": 2.0068867206573486, - "learning_rate": 2.7194905614785742e-05, - "loss": 0.5136, - "step": 154780 - }, - { - "epoch": 1.3683940663731677, - "grad_norm": 4.147223949432373, - "learning_rate": 2.7193432227113867e-05, - "loss": 0.628, - "step": 154790 - }, - { - "epoch": 1.36848246963348, - "grad_norm": 1.8314428329467773, - "learning_rate": 2.7191958839442e-05, - "loss": 0.6477, - "step": 154800 - }, - { - "epoch": 1.3685708728937924, - "grad_norm": 4.222271919250488, - "learning_rate": 2.719048545177013e-05, - "loss": 0.6418, - "step": 154810 - }, - { - "epoch": 1.3686592761541045, - "grad_norm": 2.435983419418335, - "learning_rate": 2.7189012064098256e-05, - "loss": 0.7608, - "step": 154820 - }, - { - "epoch": 1.3687476794144169, - "grad_norm": 6.38040828704834, - "learning_rate": 2.7187538676426387e-05, - "loss": 0.6733, - "step": 154830 - }, - { - "epoch": 1.368836082674729, - "grad_norm": 8.850046157836914, - "learning_rate": 2.718606528875452e-05, - "loss": 0.6012, - "step": 154840 - }, - { - "epoch": 1.3689244859350413, - "grad_norm": 3.2060370445251465, - "learning_rate": 2.7184591901082644e-05, - "loss": 0.5737, - "step": 154850 - }, - { - "epoch": 1.3690128891953535, - "grad_norm": 1.6039284467697144, - "learning_rate": 2.7183118513410776e-05, - "loss": 0.5803, - "step": 154860 - }, - { - "epoch": 1.3691012924556658, - "grad_norm": 8.194977760314941, - "learning_rate": 2.7181645125738904e-05, - "loss": 0.6032, - "step": 154870 - }, - { - "epoch": 1.369189695715978, - "grad_norm": 1.7717314958572388, - "learning_rate": 2.7180171738067032e-05, - "loss": 0.6984, - "step": 154880 - }, - { - "epoch": 1.3692780989762903, - "grad_norm": 3.3422884941101074, - "learning_rate": 2.7178698350395164e-05, - "loss": 0.7046, - "step": 154890 - }, - { - "epoch": 1.3693665022366024, - "grad_norm": 3.9236302375793457, - "learning_rate": 2.7177224962723293e-05, - "loss": 0.5878, - "step": 154900 - }, - { - "epoch": 1.3694549054969147, - "grad_norm": 2.0146372318267822, - "learning_rate": 2.717575157505142e-05, - "loss": 0.6033, - "step": 154910 - }, - { - "epoch": 1.369543308757227, - "grad_norm": 1.5420422554016113, - "learning_rate": 2.7174278187379553e-05, - "loss": 0.6024, - "step": 154920 - }, - { - "epoch": 1.3696317120175392, - "grad_norm": 1.5074349641799927, - "learning_rate": 2.717280479970768e-05, - "loss": 0.5833, - "step": 154930 - }, - { - "epoch": 1.3697201152778513, - "grad_norm": 3.9746596813201904, - "learning_rate": 2.717133141203581e-05, - "loss": 0.5922, - "step": 154940 - }, - { - "epoch": 1.3698085185381637, - "grad_norm": 3.6697375774383545, - "learning_rate": 2.716985802436394e-05, - "loss": 0.5826, - "step": 154950 - }, - { - "epoch": 1.369896921798476, - "grad_norm": 4.591105937957764, - "learning_rate": 2.716838463669207e-05, - "loss": 0.5598, - "step": 154960 - }, - { - "epoch": 1.3699853250587881, - "grad_norm": 1.2530019283294678, - "learning_rate": 2.7166911249020198e-05, - "loss": 0.6354, - "step": 154970 - }, - { - "epoch": 1.3700737283191005, - "grad_norm": 1.9378292560577393, - "learning_rate": 2.7165437861348326e-05, - "loss": 0.5806, - "step": 154980 - }, - { - "epoch": 1.3701621315794126, - "grad_norm": 1.188019037246704, - "learning_rate": 2.7163964473676458e-05, - "loss": 0.636, - "step": 154990 - }, - { - "epoch": 1.370250534839725, - "grad_norm": 1.4662915468215942, - "learning_rate": 2.7162491086004586e-05, - "loss": 0.6019, - "step": 155000 - }, - { - "epoch": 1.370338938100037, - "grad_norm": 6.8162994384765625, - "learning_rate": 2.7161017698332715e-05, - "loss": 0.607, - "step": 155010 - }, - { - "epoch": 1.3704273413603494, - "grad_norm": 1.2052501440048218, - "learning_rate": 2.7159544310660846e-05, - "loss": 0.5793, - "step": 155020 - }, - { - "epoch": 1.3705157446206617, - "grad_norm": 4.539021968841553, - "learning_rate": 2.7158070922988975e-05, - "loss": 0.4963, - "step": 155030 - }, - { - "epoch": 1.3706041478809738, - "grad_norm": 10.318037033081055, - "learning_rate": 2.7156597535317103e-05, - "loss": 0.5552, - "step": 155040 - }, - { - "epoch": 1.370692551141286, - "grad_norm": 1.9288170337677002, - "learning_rate": 2.7155124147645235e-05, - "loss": 0.5725, - "step": 155050 - }, - { - "epoch": 1.3707809544015983, - "grad_norm": 1.8394007682800293, - "learning_rate": 2.7153650759973363e-05, - "loss": 0.5495, - "step": 155060 - }, - { - "epoch": 1.3708693576619106, - "grad_norm": 26.70475196838379, - "learning_rate": 2.715217737230149e-05, - "loss": 0.6242, - "step": 155070 - }, - { - "epoch": 1.3709577609222228, - "grad_norm": 1.2042925357818604, - "learning_rate": 2.7150703984629623e-05, - "loss": 0.5535, - "step": 155080 - }, - { - "epoch": 1.371046164182535, - "grad_norm": 4.55109167098999, - "learning_rate": 2.714923059695775e-05, - "loss": 0.4343, - "step": 155090 - }, - { - "epoch": 1.3711345674428472, - "grad_norm": 3.5097484588623047, - "learning_rate": 2.714775720928588e-05, - "loss": 0.7697, - "step": 155100 - }, - { - "epoch": 1.3712229707031596, - "grad_norm": 1.5943996906280518, - "learning_rate": 2.7146283821614012e-05, - "loss": 0.6223, - "step": 155110 - }, - { - "epoch": 1.3713113739634717, - "grad_norm": 1.7234963178634644, - "learning_rate": 2.7144810433942137e-05, - "loss": 0.6311, - "step": 155120 - }, - { - "epoch": 1.371399777223784, - "grad_norm": 3.5486643314361572, - "learning_rate": 2.714333704627027e-05, - "loss": 0.5866, - "step": 155130 - }, - { - "epoch": 1.3714881804840964, - "grad_norm": 4.045604705810547, - "learning_rate": 2.71418636585984e-05, - "loss": 0.6922, - "step": 155140 - }, - { - "epoch": 1.3715765837444085, - "grad_norm": 23.683698654174805, - "learning_rate": 2.7140390270926525e-05, - "loss": 0.7124, - "step": 155150 - }, - { - "epoch": 1.3716649870047206, - "grad_norm": 5.036553382873535, - "learning_rate": 2.7138916883254657e-05, - "loss": 0.6982, - "step": 155160 - }, - { - "epoch": 1.371753390265033, - "grad_norm": 1.9953233003616333, - "learning_rate": 2.713744349558279e-05, - "loss": 0.6876, - "step": 155170 - }, - { - "epoch": 1.3718417935253453, - "grad_norm": 1.812619924545288, - "learning_rate": 2.7135970107910914e-05, - "loss": 0.7092, - "step": 155180 - }, - { - "epoch": 1.3719301967856574, - "grad_norm": 2.9480655193328857, - "learning_rate": 2.7134496720239045e-05, - "loss": 0.7175, - "step": 155190 - }, - { - "epoch": 1.3720186000459698, - "grad_norm": 1.8385483026504517, - "learning_rate": 2.7133023332567177e-05, - "loss": 0.6191, - "step": 155200 - }, - { - "epoch": 1.3721070033062819, - "grad_norm": 2.125201940536499, - "learning_rate": 2.7131549944895302e-05, - "loss": 0.7503, - "step": 155210 - }, - { - "epoch": 1.3721954065665942, - "grad_norm": 2.1805617809295654, - "learning_rate": 2.7130076557223434e-05, - "loss": 0.6716, - "step": 155220 - }, - { - "epoch": 1.3722838098269063, - "grad_norm": 4.144437313079834, - "learning_rate": 2.712860316955156e-05, - "loss": 0.7142, - "step": 155230 - }, - { - "epoch": 1.3723722130872187, - "grad_norm": 6.431118965148926, - "learning_rate": 2.712712978187969e-05, - "loss": 0.6927, - "step": 155240 - }, - { - "epoch": 1.372460616347531, - "grad_norm": 2.286168336868286, - "learning_rate": 2.7125656394207822e-05, - "loss": 0.6384, - "step": 155250 - }, - { - "epoch": 1.3725490196078431, - "grad_norm": 2.2630951404571533, - "learning_rate": 2.7124183006535947e-05, - "loss": 0.6558, - "step": 155260 - }, - { - "epoch": 1.3726374228681553, - "grad_norm": 1.3777186870574951, - "learning_rate": 2.712270961886408e-05, - "loss": 0.6264, - "step": 155270 - }, - { - "epoch": 1.3727258261284676, - "grad_norm": 0.9174994230270386, - "learning_rate": 2.712123623119221e-05, - "loss": 0.5234, - "step": 155280 - }, - { - "epoch": 1.37281422938878, - "grad_norm": 19.313364028930664, - "learning_rate": 2.7119762843520336e-05, - "loss": 0.5636, - "step": 155290 - }, - { - "epoch": 1.372902632649092, - "grad_norm": 3.4840359687805176, - "learning_rate": 2.7118289455848467e-05, - "loss": 0.6314, - "step": 155300 - }, - { - "epoch": 1.3729910359094044, - "grad_norm": 1.7505651712417603, - "learning_rate": 2.71168160681766e-05, - "loss": 0.6878, - "step": 155310 - }, - { - "epoch": 1.3730794391697165, - "grad_norm": 2.405698537826538, - "learning_rate": 2.7115342680504724e-05, - "loss": 0.5962, - "step": 155320 - }, - { - "epoch": 1.3731678424300289, - "grad_norm": 3.2754852771759033, - "learning_rate": 2.7113869292832856e-05, - "loss": 0.6436, - "step": 155330 - }, - { - "epoch": 1.373256245690341, - "grad_norm": 3.2899110317230225, - "learning_rate": 2.711239590516098e-05, - "loss": 0.6518, - "step": 155340 - }, - { - "epoch": 1.3733446489506533, - "grad_norm": 6.16387939453125, - "learning_rate": 2.7110922517489113e-05, - "loss": 0.4967, - "step": 155350 - }, - { - "epoch": 1.3734330522109657, - "grad_norm": 1.9771013259887695, - "learning_rate": 2.7109449129817244e-05, - "loss": 0.6309, - "step": 155360 - }, - { - "epoch": 1.3735214554712778, - "grad_norm": 5.630110263824463, - "learning_rate": 2.710797574214537e-05, - "loss": 0.6041, - "step": 155370 - }, - { - "epoch": 1.37360985873159, - "grad_norm": 1.4134987592697144, - "learning_rate": 2.71065023544735e-05, - "loss": 0.703, - "step": 155380 - }, - { - "epoch": 1.3736982619919023, - "grad_norm": 1.9246699810028076, - "learning_rate": 2.7105028966801633e-05, - "loss": 0.5857, - "step": 155390 - }, - { - "epoch": 1.3737866652522146, - "grad_norm": 2.85746431350708, - "learning_rate": 2.7103555579129758e-05, - "loss": 0.5657, - "step": 155400 - }, - { - "epoch": 1.3738750685125267, - "grad_norm": 4.268984317779541, - "learning_rate": 2.710208219145789e-05, - "loss": 0.5559, - "step": 155410 - }, - { - "epoch": 1.373963471772839, - "grad_norm": 9.158341407775879, - "learning_rate": 2.710060880378602e-05, - "loss": 0.7064, - "step": 155420 - }, - { - "epoch": 1.3740518750331512, - "grad_norm": 3.0119051933288574, - "learning_rate": 2.7099135416114146e-05, - "loss": 0.6397, - "step": 155430 - }, - { - "epoch": 1.3741402782934635, - "grad_norm": 5.203845024108887, - "learning_rate": 2.7097662028442278e-05, - "loss": 0.6357, - "step": 155440 - }, - { - "epoch": 1.3742286815537756, - "grad_norm": 2.26387357711792, - "learning_rate": 2.7096188640770403e-05, - "loss": 0.6279, - "step": 155450 - }, - { - "epoch": 1.374317084814088, - "grad_norm": 19.37293243408203, - "learning_rate": 2.7094715253098535e-05, - "loss": 0.6368, - "step": 155460 - }, - { - "epoch": 1.3744054880744, - "grad_norm": 2.113950729370117, - "learning_rate": 2.7093241865426666e-05, - "loss": 0.5691, - "step": 155470 - }, - { - "epoch": 1.3744938913347124, - "grad_norm": 2.456430435180664, - "learning_rate": 2.709176847775479e-05, - "loss": 0.5471, - "step": 155480 - }, - { - "epoch": 1.3745822945950246, - "grad_norm": 1.3330222368240356, - "learning_rate": 2.7090295090082923e-05, - "loss": 0.6192, - "step": 155490 - }, - { - "epoch": 1.374670697855337, - "grad_norm": 3.0857956409454346, - "learning_rate": 2.7088821702411055e-05, - "loss": 0.7707, - "step": 155500 - }, - { - "epoch": 1.3747591011156493, - "grad_norm": 1.8446743488311768, - "learning_rate": 2.708734831473918e-05, - "loss": 0.6216, - "step": 155510 - }, - { - "epoch": 1.3748475043759614, - "grad_norm": 4.19819974899292, - "learning_rate": 2.708587492706731e-05, - "loss": 0.6936, - "step": 155520 - }, - { - "epoch": 1.3749359076362735, - "grad_norm": 1.715083122253418, - "learning_rate": 2.7084401539395443e-05, - "loss": 0.704, - "step": 155530 - }, - { - "epoch": 1.3750243108965858, - "grad_norm": 1.6971405744552612, - "learning_rate": 2.7082928151723568e-05, - "loss": 0.5975, - "step": 155540 - }, - { - "epoch": 1.3751127141568982, - "grad_norm": 4.815489768981934, - "learning_rate": 2.70814547640517e-05, - "loss": 0.7434, - "step": 155550 - }, - { - "epoch": 1.3752011174172103, - "grad_norm": 2.8287346363067627, - "learning_rate": 2.7079981376379832e-05, - "loss": 0.6777, - "step": 155560 - }, - { - "epoch": 1.3752895206775226, - "grad_norm": 19.006778717041016, - "learning_rate": 2.7078507988707957e-05, - "loss": 0.6244, - "step": 155570 - }, - { - "epoch": 1.3753779239378348, - "grad_norm": 10.056194305419922, - "learning_rate": 2.707703460103609e-05, - "loss": 0.5373, - "step": 155580 - }, - { - "epoch": 1.375466327198147, - "grad_norm": 5.858008861541748, - "learning_rate": 2.7075561213364213e-05, - "loss": 0.5824, - "step": 155590 - }, - { - "epoch": 1.3755547304584592, - "grad_norm": 1.447767972946167, - "learning_rate": 2.7074087825692345e-05, - "loss": 0.6308, - "step": 155600 - }, - { - "epoch": 1.3756431337187716, - "grad_norm": 2.093517541885376, - "learning_rate": 2.7072614438020477e-05, - "loss": 0.776, - "step": 155610 - }, - { - "epoch": 1.375731536979084, - "grad_norm": 2.2905070781707764, - "learning_rate": 2.7071141050348602e-05, - "loss": 0.7129, - "step": 155620 - }, - { - "epoch": 1.375819940239396, - "grad_norm": 2.227703809738159, - "learning_rate": 2.7069667662676734e-05, - "loss": 0.5256, - "step": 155630 - }, - { - "epoch": 1.3759083434997081, - "grad_norm": 2.6590676307678223, - "learning_rate": 2.7068194275004865e-05, - "loss": 0.6504, - "step": 155640 - }, - { - "epoch": 1.3759967467600205, - "grad_norm": 1.292912483215332, - "learning_rate": 2.706672088733299e-05, - "loss": 0.5684, - "step": 155650 - }, - { - "epoch": 1.3760851500203328, - "grad_norm": 10.864413261413574, - "learning_rate": 2.7065247499661122e-05, - "loss": 0.5916, - "step": 155660 - }, - { - "epoch": 1.376173553280645, - "grad_norm": 1.0657316446304321, - "learning_rate": 2.7063774111989254e-05, - "loss": 0.5872, - "step": 155670 - }, - { - "epoch": 1.3762619565409573, - "grad_norm": 1.5046132802963257, - "learning_rate": 2.706230072431738e-05, - "loss": 0.6135, - "step": 155680 - }, - { - "epoch": 1.3763503598012694, - "grad_norm": 2.8958046436309814, - "learning_rate": 2.706082733664551e-05, - "loss": 0.5094, - "step": 155690 - }, - { - "epoch": 1.3764387630615817, - "grad_norm": 4.554339408874512, - "learning_rate": 2.7059353948973635e-05, - "loss": 0.4478, - "step": 155700 - }, - { - "epoch": 1.3765271663218939, - "grad_norm": 3.73746919631958, - "learning_rate": 2.7057880561301767e-05, - "loss": 0.656, - "step": 155710 - }, - { - "epoch": 1.3766155695822062, - "grad_norm": 2.260776996612549, - "learning_rate": 2.70564071736299e-05, - "loss": 0.4898, - "step": 155720 - }, - { - "epoch": 1.3767039728425186, - "grad_norm": 3.22761869430542, - "learning_rate": 2.7054933785958024e-05, - "loss": 0.5594, - "step": 155730 - }, - { - "epoch": 1.3767923761028307, - "grad_norm": 1.7947300672531128, - "learning_rate": 2.7053460398286156e-05, - "loss": 0.7092, - "step": 155740 - }, - { - "epoch": 1.3768807793631428, - "grad_norm": 1.503580093383789, - "learning_rate": 2.7051987010614287e-05, - "loss": 0.6421, - "step": 155750 - }, - { - "epoch": 1.3769691826234551, - "grad_norm": 2.2446248531341553, - "learning_rate": 2.7050513622942412e-05, - "loss": 0.5834, - "step": 155760 - }, - { - "epoch": 1.3770575858837675, - "grad_norm": 2.4247617721557617, - "learning_rate": 2.7049040235270544e-05, - "loss": 0.556, - "step": 155770 - }, - { - "epoch": 1.3771459891440796, - "grad_norm": 2.387530565261841, - "learning_rate": 2.7047566847598676e-05, - "loss": 0.617, - "step": 155780 - }, - { - "epoch": 1.377234392404392, - "grad_norm": 2.995638132095337, - "learning_rate": 2.70460934599268e-05, - "loss": 0.6097, - "step": 155790 - }, - { - "epoch": 1.377322795664704, - "grad_norm": 3.863736867904663, - "learning_rate": 2.7044620072254933e-05, - "loss": 0.6089, - "step": 155800 - }, - { - "epoch": 1.3774111989250164, - "grad_norm": 1.823962926864624, - "learning_rate": 2.704314668458306e-05, - "loss": 0.7074, - "step": 155810 - }, - { - "epoch": 1.3774996021853285, - "grad_norm": 1.502862811088562, - "learning_rate": 2.704167329691119e-05, - "loss": 0.5894, - "step": 155820 - }, - { - "epoch": 1.3775880054456409, - "grad_norm": 3.4480679035186768, - "learning_rate": 2.704019990923932e-05, - "loss": 0.6318, - "step": 155830 - }, - { - "epoch": 1.3776764087059532, - "grad_norm": 1.622433066368103, - "learning_rate": 2.703872652156745e-05, - "loss": 0.6106, - "step": 155840 - }, - { - "epoch": 1.3777648119662653, - "grad_norm": 1.1865192651748657, - "learning_rate": 2.7037253133895578e-05, - "loss": 0.5732, - "step": 155850 - }, - { - "epoch": 1.3778532152265774, - "grad_norm": 3.9950077533721924, - "learning_rate": 2.703577974622371e-05, - "loss": 0.5774, - "step": 155860 - }, - { - "epoch": 1.3779416184868898, - "grad_norm": 1.5382553339004517, - "learning_rate": 2.7034306358551838e-05, - "loss": 0.6515, - "step": 155870 - }, - { - "epoch": 1.3780300217472021, - "grad_norm": 1.448900818824768, - "learning_rate": 2.7032832970879966e-05, - "loss": 0.5279, - "step": 155880 - }, - { - "epoch": 1.3781184250075142, - "grad_norm": 3.701958417892456, - "learning_rate": 2.7031359583208098e-05, - "loss": 0.6972, - "step": 155890 - }, - { - "epoch": 1.3782068282678266, - "grad_norm": 4.679746627807617, - "learning_rate": 2.7029886195536226e-05, - "loss": 0.5998, - "step": 155900 - }, - { - "epoch": 1.3782952315281387, - "grad_norm": 3.1549744606018066, - "learning_rate": 2.7028412807864355e-05, - "loss": 0.5547, - "step": 155910 - }, - { - "epoch": 1.378383634788451, - "grad_norm": 1.7143431901931763, - "learning_rate": 2.7026939420192483e-05, - "loss": 0.6025, - "step": 155920 - }, - { - "epoch": 1.3784720380487632, - "grad_norm": 1.9437109231948853, - "learning_rate": 2.7025466032520615e-05, - "loss": 0.6479, - "step": 155930 - }, - { - "epoch": 1.3785604413090755, - "grad_norm": 7.047440052032471, - "learning_rate": 2.7023992644848743e-05, - "loss": 0.54, - "step": 155940 - }, - { - "epoch": 1.3786488445693879, - "grad_norm": 3.4126367568969727, - "learning_rate": 2.702251925717687e-05, - "loss": 0.532, - "step": 155950 - }, - { - "epoch": 1.3787372478297, - "grad_norm": 3.4150843620300293, - "learning_rate": 2.7021045869505003e-05, - "loss": 0.4966, - "step": 155960 - }, - { - "epoch": 1.378825651090012, - "grad_norm": 2.4167957305908203, - "learning_rate": 2.701957248183313e-05, - "loss": 0.5523, - "step": 155970 - }, - { - "epoch": 1.3789140543503244, - "grad_norm": 3.489398241043091, - "learning_rate": 2.701809909416126e-05, - "loss": 0.6639, - "step": 155980 - }, - { - "epoch": 1.3790024576106368, - "grad_norm": 7.619278907775879, - "learning_rate": 2.701662570648939e-05, - "loss": 0.5892, - "step": 155990 - }, - { - "epoch": 1.379090860870949, - "grad_norm": 3.692145586013794, - "learning_rate": 2.701515231881752e-05, - "loss": 0.5844, - "step": 156000 - }, - { - "epoch": 1.3791792641312612, - "grad_norm": 1.9484286308288574, - "learning_rate": 2.7013678931145648e-05, - "loss": 0.6469, - "step": 156010 - }, - { - "epoch": 1.3792676673915734, - "grad_norm": 1.8545992374420166, - "learning_rate": 2.701220554347378e-05, - "loss": 0.6258, - "step": 156020 - }, - { - "epoch": 1.3793560706518857, - "grad_norm": 2.85139799118042, - "learning_rate": 2.701073215580191e-05, - "loss": 0.66, - "step": 156030 - }, - { - "epoch": 1.3794444739121978, - "grad_norm": 1.1166852712631226, - "learning_rate": 2.7009258768130037e-05, - "loss": 0.6271, - "step": 156040 - }, - { - "epoch": 1.3795328771725102, - "grad_norm": 2.727505683898926, - "learning_rate": 2.700778538045817e-05, - "loss": 0.6577, - "step": 156050 - }, - { - "epoch": 1.3796212804328223, - "grad_norm": 1.5364007949829102, - "learning_rate": 2.7006311992786293e-05, - "loss": 0.4364, - "step": 156060 - }, - { - "epoch": 1.3797096836931346, - "grad_norm": 4.598865032196045, - "learning_rate": 2.7004838605114425e-05, - "loss": 0.4635, - "step": 156070 - }, - { - "epoch": 1.3797980869534467, - "grad_norm": 6.509904861450195, - "learning_rate": 2.7003365217442557e-05, - "loss": 0.5533, - "step": 156080 - }, - { - "epoch": 1.379886490213759, - "grad_norm": 1.072874903678894, - "learning_rate": 2.7001891829770682e-05, - "loss": 0.6263, - "step": 156090 - }, - { - "epoch": 1.3799748934740714, - "grad_norm": 19.482872009277344, - "learning_rate": 2.7000418442098814e-05, - "loss": 0.5289, - "step": 156100 - }, - { - "epoch": 1.3800632967343835, - "grad_norm": 25.272069931030273, - "learning_rate": 2.6998945054426945e-05, - "loss": 0.6261, - "step": 156110 - }, - { - "epoch": 1.3801516999946957, - "grad_norm": 3.5382237434387207, - "learning_rate": 2.699747166675507e-05, - "loss": 0.6623, - "step": 156120 - }, - { - "epoch": 1.380240103255008, - "grad_norm": 6.000899791717529, - "learning_rate": 2.6995998279083202e-05, - "loss": 0.88, - "step": 156130 - }, - { - "epoch": 1.3803285065153204, - "grad_norm": 9.717784881591797, - "learning_rate": 2.6994524891411334e-05, - "loss": 0.667, - "step": 156140 - }, - { - "epoch": 1.3804169097756325, - "grad_norm": 6.183516502380371, - "learning_rate": 2.699305150373946e-05, - "loss": 0.6779, - "step": 156150 - }, - { - "epoch": 1.3805053130359448, - "grad_norm": 2.6247222423553467, - "learning_rate": 2.699157811606759e-05, - "loss": 0.619, - "step": 156160 - }, - { - "epoch": 1.380593716296257, - "grad_norm": 8.288039207458496, - "learning_rate": 2.6990104728395716e-05, - "loss": 0.7186, - "step": 156170 - }, - { - "epoch": 1.3806821195565693, - "grad_norm": 9.10761547088623, - "learning_rate": 2.6988631340723847e-05, - "loss": 0.732, - "step": 156180 - }, - { - "epoch": 1.3807705228168814, - "grad_norm": 7.843234539031982, - "learning_rate": 2.698715795305198e-05, - "loss": 0.6819, - "step": 156190 - }, - { - "epoch": 1.3808589260771937, - "grad_norm": 2.5948920249938965, - "learning_rate": 2.6985684565380104e-05, - "loss": 0.5545, - "step": 156200 - }, - { - "epoch": 1.380947329337506, - "grad_norm": 5.79525899887085, - "learning_rate": 2.6984211177708236e-05, - "loss": 0.6209, - "step": 156210 - }, - { - "epoch": 1.3810357325978182, - "grad_norm": 11.15489387512207, - "learning_rate": 2.6982737790036367e-05, - "loss": 0.7334, - "step": 156220 - }, - { - "epoch": 1.3811241358581303, - "grad_norm": 4.876621246337891, - "learning_rate": 2.6981264402364492e-05, - "loss": 0.593, - "step": 156230 - }, - { - "epoch": 1.3812125391184427, - "grad_norm": 1.5784831047058105, - "learning_rate": 2.6979791014692624e-05, - "loss": 0.5793, - "step": 156240 - }, - { - "epoch": 1.381300942378755, - "grad_norm": 2.1730284690856934, - "learning_rate": 2.6978317627020756e-05, - "loss": 0.5601, - "step": 156250 - }, - { - "epoch": 1.3813893456390671, - "grad_norm": 2.341017961502075, - "learning_rate": 2.697684423934888e-05, - "loss": 0.677, - "step": 156260 - }, - { - "epoch": 1.3814777488993795, - "grad_norm": 4.492663860321045, - "learning_rate": 2.6975370851677013e-05, - "loss": 0.62, - "step": 156270 - }, - { - "epoch": 1.3815661521596916, - "grad_norm": 2.9659740924835205, - "learning_rate": 2.6973897464005138e-05, - "loss": 0.6348, - "step": 156280 - }, - { - "epoch": 1.381654555420004, - "grad_norm": 4.358122825622559, - "learning_rate": 2.697242407633327e-05, - "loss": 0.7985, - "step": 156290 - }, - { - "epoch": 1.381742958680316, - "grad_norm": 1.2129637002944946, - "learning_rate": 2.69709506886614e-05, - "loss": 0.5281, - "step": 156300 - }, - { - "epoch": 1.3818313619406284, - "grad_norm": 2.309075117111206, - "learning_rate": 2.6969477300989526e-05, - "loss": 0.4839, - "step": 156310 - }, - { - "epoch": 1.3819197652009407, - "grad_norm": 1.069314956665039, - "learning_rate": 2.6968003913317658e-05, - "loss": 0.6128, - "step": 156320 - }, - { - "epoch": 1.3820081684612529, - "grad_norm": 4.080246448516846, - "learning_rate": 2.696653052564579e-05, - "loss": 0.7097, - "step": 156330 - }, - { - "epoch": 1.382096571721565, - "grad_norm": 3.4198551177978516, - "learning_rate": 2.6965057137973914e-05, - "loss": 0.6586, - "step": 156340 - }, - { - "epoch": 1.3821849749818773, - "grad_norm": 1.1350969076156616, - "learning_rate": 2.6963583750302046e-05, - "loss": 0.5668, - "step": 156350 - }, - { - "epoch": 1.3822733782421897, - "grad_norm": 1.9831739664077759, - "learning_rate": 2.6962110362630178e-05, - "loss": 0.4136, - "step": 156360 - }, - { - "epoch": 1.3823617815025018, - "grad_norm": 1.7095671892166138, - "learning_rate": 2.6960636974958303e-05, - "loss": 0.6271, - "step": 156370 - }, - { - "epoch": 1.3824501847628141, - "grad_norm": 5.296531677246094, - "learning_rate": 2.6959163587286435e-05, - "loss": 0.7692, - "step": 156380 - }, - { - "epoch": 1.3825385880231262, - "grad_norm": 4.982787132263184, - "learning_rate": 2.695769019961456e-05, - "loss": 0.4727, - "step": 156390 - }, - { - "epoch": 1.3826269912834386, - "grad_norm": 2.1806366443634033, - "learning_rate": 2.695621681194269e-05, - "loss": 0.5999, - "step": 156400 - }, - { - "epoch": 1.3827153945437507, - "grad_norm": 1.8754287958145142, - "learning_rate": 2.6954743424270823e-05, - "loss": 0.5037, - "step": 156410 - }, - { - "epoch": 1.382803797804063, - "grad_norm": 1.323127269744873, - "learning_rate": 2.6953270036598948e-05, - "loss": 0.5753, - "step": 156420 - }, - { - "epoch": 1.3828922010643754, - "grad_norm": 5.687112808227539, - "learning_rate": 2.695179664892708e-05, - "loss": 0.6044, - "step": 156430 - }, - { - "epoch": 1.3829806043246875, - "grad_norm": 4.270662307739258, - "learning_rate": 2.695032326125521e-05, - "loss": 0.5245, - "step": 156440 - }, - { - "epoch": 1.3830690075849996, - "grad_norm": 4.5958147048950195, - "learning_rate": 2.6948849873583337e-05, - "loss": 0.6225, - "step": 156450 - }, - { - "epoch": 1.383157410845312, - "grad_norm": 4.541618347167969, - "learning_rate": 2.6947376485911468e-05, - "loss": 0.5054, - "step": 156460 - }, - { - "epoch": 1.3832458141056243, - "grad_norm": 4.363905429840088, - "learning_rate": 2.69459030982396e-05, - "loss": 0.5859, - "step": 156470 - }, - { - "epoch": 1.3833342173659364, - "grad_norm": 2.2598202228546143, - "learning_rate": 2.6944429710567725e-05, - "loss": 0.6096, - "step": 156480 - }, - { - "epoch": 1.3834226206262488, - "grad_norm": 1.849174976348877, - "learning_rate": 2.6942956322895857e-05, - "loss": 0.581, - "step": 156490 - }, - { - "epoch": 1.3835110238865609, - "grad_norm": 1.1192522048950195, - "learning_rate": 2.694148293522399e-05, - "loss": 0.5989, - "step": 156500 - }, - { - "epoch": 1.3835994271468732, - "grad_norm": 2.742314100265503, - "learning_rate": 2.6940009547552113e-05, - "loss": 0.584, - "step": 156510 - }, - { - "epoch": 1.3836878304071853, - "grad_norm": 4.095992565155029, - "learning_rate": 2.6938536159880245e-05, - "loss": 0.6587, - "step": 156520 - }, - { - "epoch": 1.3837762336674977, - "grad_norm": 2.7218616008758545, - "learning_rate": 2.693706277220837e-05, - "loss": 0.6269, - "step": 156530 - }, - { - "epoch": 1.38386463692781, - "grad_norm": 5.992404460906982, - "learning_rate": 2.6935589384536502e-05, - "loss": 0.6211, - "step": 156540 - }, - { - "epoch": 1.3839530401881222, - "grad_norm": 1.512061357498169, - "learning_rate": 2.6934115996864634e-05, - "loss": 0.6085, - "step": 156550 - }, - { - "epoch": 1.3840414434484343, - "grad_norm": 3.023223876953125, - "learning_rate": 2.693264260919276e-05, - "loss": 0.7619, - "step": 156560 - }, - { - "epoch": 1.3841298467087466, - "grad_norm": 4.649730205535889, - "learning_rate": 2.693116922152089e-05, - "loss": 0.5324, - "step": 156570 - }, - { - "epoch": 1.384218249969059, - "grad_norm": 2.2508370876312256, - "learning_rate": 2.6929695833849022e-05, - "loss": 0.6693, - "step": 156580 - }, - { - "epoch": 1.384306653229371, - "grad_norm": 3.125150680541992, - "learning_rate": 2.6928222446177147e-05, - "loss": 0.7694, - "step": 156590 - }, - { - "epoch": 1.3843950564896834, - "grad_norm": 3.8792030811309814, - "learning_rate": 2.692674905850528e-05, - "loss": 0.4879, - "step": 156600 - }, - { - "epoch": 1.3844834597499955, - "grad_norm": 5.308789253234863, - "learning_rate": 2.692527567083341e-05, - "loss": 0.6282, - "step": 156610 - }, - { - "epoch": 1.3845718630103079, - "grad_norm": 0.6319215297698975, - "learning_rate": 2.6923802283161535e-05, - "loss": 0.6267, - "step": 156620 - }, - { - "epoch": 1.38466026627062, - "grad_norm": 4.450835704803467, - "learning_rate": 2.6922328895489667e-05, - "loss": 0.674, - "step": 156630 - }, - { - "epoch": 1.3847486695309323, - "grad_norm": 6.0222978591918945, - "learning_rate": 2.6920855507817792e-05, - "loss": 0.6524, - "step": 156640 - }, - { - "epoch": 1.3848370727912445, - "grad_norm": 1.9837299585342407, - "learning_rate": 2.6919382120145924e-05, - "loss": 0.6489, - "step": 156650 - }, - { - "epoch": 1.3849254760515568, - "grad_norm": 5.43040657043457, - "learning_rate": 2.6917908732474056e-05, - "loss": 0.6283, - "step": 156660 - }, - { - "epoch": 1.385013879311869, - "grad_norm": 3.1336097717285156, - "learning_rate": 2.691643534480218e-05, - "loss": 0.4765, - "step": 156670 - }, - { - "epoch": 1.3851022825721813, - "grad_norm": 1.0707249641418457, - "learning_rate": 2.6914961957130312e-05, - "loss": 0.5894, - "step": 156680 - }, - { - "epoch": 1.3851906858324936, - "grad_norm": 10.350285530090332, - "learning_rate": 2.6913488569458444e-05, - "loss": 0.5456, - "step": 156690 - }, - { - "epoch": 1.3852790890928057, - "grad_norm": 2.535919427871704, - "learning_rate": 2.691201518178657e-05, - "loss": 0.4919, - "step": 156700 - }, - { - "epoch": 1.3853674923531178, - "grad_norm": 2.2882843017578125, - "learning_rate": 2.69105417941147e-05, - "loss": 0.638, - "step": 156710 - }, - { - "epoch": 1.3854558956134302, - "grad_norm": 9.162206649780273, - "learning_rate": 2.6909068406442833e-05, - "loss": 0.5082, - "step": 156720 - }, - { - "epoch": 1.3855442988737425, - "grad_norm": 2.086186170578003, - "learning_rate": 2.6907595018770958e-05, - "loss": 0.5452, - "step": 156730 - }, - { - "epoch": 1.3856327021340547, - "grad_norm": 2.1356472969055176, - "learning_rate": 2.690612163109909e-05, - "loss": 0.6348, - "step": 156740 - }, - { - "epoch": 1.385721105394367, - "grad_norm": 5.605736255645752, - "learning_rate": 2.6904648243427218e-05, - "loss": 0.5824, - "step": 156750 - }, - { - "epoch": 1.3858095086546791, - "grad_norm": 8.289831161499023, - "learning_rate": 2.6903174855755346e-05, - "loss": 0.6705, - "step": 156760 - }, - { - "epoch": 1.3858979119149915, - "grad_norm": 1.900573492050171, - "learning_rate": 2.6901701468083478e-05, - "loss": 0.4792, - "step": 156770 - }, - { - "epoch": 1.3859863151753036, - "grad_norm": 0.6380146145820618, - "learning_rate": 2.6900228080411606e-05, - "loss": 0.5732, - "step": 156780 - }, - { - "epoch": 1.386074718435616, - "grad_norm": 1.428074836730957, - "learning_rate": 2.6898754692739734e-05, - "loss": 0.7172, - "step": 156790 - }, - { - "epoch": 1.3861631216959283, - "grad_norm": 1.7874327898025513, - "learning_rate": 2.6897281305067866e-05, - "loss": 0.5252, - "step": 156800 - }, - { - "epoch": 1.3862515249562404, - "grad_norm": 2.0756053924560547, - "learning_rate": 2.6895807917395995e-05, - "loss": 0.5948, - "step": 156810 - }, - { - "epoch": 1.3863399282165525, - "grad_norm": 1.0823822021484375, - "learning_rate": 2.6894334529724123e-05, - "loss": 0.624, - "step": 156820 - }, - { - "epoch": 1.3864283314768648, - "grad_norm": 1.5016306638717651, - "learning_rate": 2.6892861142052255e-05, - "loss": 0.5785, - "step": 156830 - }, - { - "epoch": 1.3865167347371772, - "grad_norm": 2.820362091064453, - "learning_rate": 2.6891387754380383e-05, - "loss": 0.6595, - "step": 156840 - }, - { - "epoch": 1.3866051379974893, - "grad_norm": 1.7089853286743164, - "learning_rate": 2.688991436670851e-05, - "loss": 0.6851, - "step": 156850 - }, - { - "epoch": 1.3866935412578016, - "grad_norm": 2.8538742065429688, - "learning_rate": 2.688844097903664e-05, - "loss": 0.5761, - "step": 156860 - }, - { - "epoch": 1.3867819445181138, - "grad_norm": 1.8295609951019287, - "learning_rate": 2.688696759136477e-05, - "loss": 0.5768, - "step": 156870 - }, - { - "epoch": 1.386870347778426, - "grad_norm": 11.0761079788208, - "learning_rate": 2.68854942036929e-05, - "loss": 0.5359, - "step": 156880 - }, - { - "epoch": 1.3869587510387382, - "grad_norm": 3.313791513442993, - "learning_rate": 2.6884020816021028e-05, - "loss": 0.5396, - "step": 156890 - }, - { - "epoch": 1.3870471542990506, - "grad_norm": 1.593228816986084, - "learning_rate": 2.688254742834916e-05, - "loss": 0.5603, - "step": 156900 - }, - { - "epoch": 1.387135557559363, - "grad_norm": 6.165848255157471, - "learning_rate": 2.6881074040677288e-05, - "loss": 0.6847, - "step": 156910 - }, - { - "epoch": 1.387223960819675, - "grad_norm": 3.118553876876831, - "learning_rate": 2.6879600653005417e-05, - "loss": 0.5191, - "step": 156920 - }, - { - "epoch": 1.3873123640799871, - "grad_norm": 10.501680374145508, - "learning_rate": 2.687812726533355e-05, - "loss": 0.6277, - "step": 156930 - }, - { - "epoch": 1.3874007673402995, - "grad_norm": 1.5400967597961426, - "learning_rate": 2.6876653877661677e-05, - "loss": 0.5237, - "step": 156940 - }, - { - "epoch": 1.3874891706006118, - "grad_norm": 6.471516132354736, - "learning_rate": 2.6875180489989805e-05, - "loss": 0.5892, - "step": 156950 - }, - { - "epoch": 1.387577573860924, - "grad_norm": 10.897212982177734, - "learning_rate": 2.6873707102317937e-05, - "loss": 0.5339, - "step": 156960 - }, - { - "epoch": 1.3876659771212363, - "grad_norm": 2.800159454345703, - "learning_rate": 2.6872233714646065e-05, - "loss": 0.5743, - "step": 156970 - }, - { - "epoch": 1.3877543803815484, - "grad_norm": 1.014312744140625, - "learning_rate": 2.6870760326974194e-05, - "loss": 0.6186, - "step": 156980 - }, - { - "epoch": 1.3878427836418608, - "grad_norm": 2.929683208465576, - "learning_rate": 2.6869286939302325e-05, - "loss": 0.6056, - "step": 156990 - }, - { - "epoch": 1.3879311869021729, - "grad_norm": 4.7371392250061035, - "learning_rate": 2.686781355163045e-05, - "loss": 0.6531, - "step": 157000 - }, - { - "epoch": 1.3880195901624852, - "grad_norm": 6.103567123413086, - "learning_rate": 2.6866340163958582e-05, - "loss": 0.6201, - "step": 157010 - }, - { - "epoch": 1.3881079934227976, - "grad_norm": 1.3877007961273193, - "learning_rate": 2.6864866776286714e-05, - "loss": 0.5622, - "step": 157020 - }, - { - "epoch": 1.3881963966831097, - "grad_norm": 1.0094410181045532, - "learning_rate": 2.686339338861484e-05, - "loss": 0.5699, - "step": 157030 - }, - { - "epoch": 1.3882847999434218, - "grad_norm": 5.75368070602417, - "learning_rate": 2.686192000094297e-05, - "loss": 0.6289, - "step": 157040 - }, - { - "epoch": 1.3883732032037341, - "grad_norm": 6.306117534637451, - "learning_rate": 2.6860446613271102e-05, - "loss": 0.7218, - "step": 157050 - }, - { - "epoch": 1.3884616064640465, - "grad_norm": 1.304063320159912, - "learning_rate": 2.6858973225599227e-05, - "loss": 0.7404, - "step": 157060 - }, - { - "epoch": 1.3885500097243586, - "grad_norm": 0.9833344221115112, - "learning_rate": 2.685749983792736e-05, - "loss": 0.5628, - "step": 157070 - }, - { - "epoch": 1.388638412984671, - "grad_norm": 1.5215866565704346, - "learning_rate": 2.685602645025549e-05, - "loss": 0.6346, - "step": 157080 - }, - { - "epoch": 1.388726816244983, - "grad_norm": 2.5768070220947266, - "learning_rate": 2.6854553062583616e-05, - "loss": 0.6523, - "step": 157090 - }, - { - "epoch": 1.3888152195052954, - "grad_norm": 2.755969285964966, - "learning_rate": 2.6853079674911747e-05, - "loss": 0.625, - "step": 157100 - }, - { - "epoch": 1.3889036227656075, - "grad_norm": 2.2574679851531982, - "learning_rate": 2.6851606287239872e-05, - "loss": 0.5981, - "step": 157110 - }, - { - "epoch": 1.3889920260259199, - "grad_norm": 1.1929677724838257, - "learning_rate": 2.6850132899568004e-05, - "loss": 0.5248, - "step": 157120 - }, - { - "epoch": 1.3890804292862322, - "grad_norm": 2.1432747840881348, - "learning_rate": 2.6848659511896136e-05, - "loss": 0.6067, - "step": 157130 - }, - { - "epoch": 1.3891688325465443, - "grad_norm": 2.543018102645874, - "learning_rate": 2.684718612422426e-05, - "loss": 0.6012, - "step": 157140 - }, - { - "epoch": 1.3892572358068564, - "grad_norm": 0.7401247620582581, - "learning_rate": 2.6845712736552392e-05, - "loss": 0.5428, - "step": 157150 - }, - { - "epoch": 1.3893456390671688, - "grad_norm": 1.8546061515808105, - "learning_rate": 2.6844239348880524e-05, - "loss": 0.688, - "step": 157160 - }, - { - "epoch": 1.3894340423274811, - "grad_norm": 1.0606430768966675, - "learning_rate": 2.684276596120865e-05, - "loss": 0.6133, - "step": 157170 - }, - { - "epoch": 1.3895224455877933, - "grad_norm": 1.0503008365631104, - "learning_rate": 2.684129257353678e-05, - "loss": 0.5446, - "step": 157180 - }, - { - "epoch": 1.3896108488481056, - "grad_norm": 1.493780493736267, - "learning_rate": 2.6839819185864913e-05, - "loss": 0.5217, - "step": 157190 - }, - { - "epoch": 1.3896992521084177, - "grad_norm": 2.809370994567871, - "learning_rate": 2.6838345798193038e-05, - "loss": 0.5819, - "step": 157200 - }, - { - "epoch": 1.38978765536873, - "grad_norm": 11.472502708435059, - "learning_rate": 2.683687241052117e-05, - "loss": 0.5705, - "step": 157210 - }, - { - "epoch": 1.3898760586290422, - "grad_norm": 3.870170831680298, - "learning_rate": 2.6835399022849294e-05, - "loss": 0.6996, - "step": 157220 - }, - { - "epoch": 1.3899644618893545, - "grad_norm": 7.817115783691406, - "learning_rate": 2.6833925635177426e-05, - "loss": 0.4974, - "step": 157230 - }, - { - "epoch": 1.3900528651496666, - "grad_norm": 30.183246612548828, - "learning_rate": 2.6832452247505558e-05, - "loss": 0.5176, - "step": 157240 - }, - { - "epoch": 1.390141268409979, - "grad_norm": 2.621539831161499, - "learning_rate": 2.6830978859833683e-05, - "loss": 0.6329, - "step": 157250 - }, - { - "epoch": 1.390229671670291, - "grad_norm": 5.485243320465088, - "learning_rate": 2.6829505472161815e-05, - "loss": 0.5713, - "step": 157260 - }, - { - "epoch": 1.3903180749306034, - "grad_norm": 1.9731295108795166, - "learning_rate": 2.6828032084489946e-05, - "loss": 0.6822, - "step": 157270 - }, - { - "epoch": 1.3904064781909158, - "grad_norm": 4.251471042633057, - "learning_rate": 2.682655869681807e-05, - "loss": 0.6475, - "step": 157280 - }, - { - "epoch": 1.390494881451228, - "grad_norm": 3.7204394340515137, - "learning_rate": 2.6825085309146203e-05, - "loss": 0.6763, - "step": 157290 - }, - { - "epoch": 1.39058328471154, - "grad_norm": 2.8035309314727783, - "learning_rate": 2.6823611921474335e-05, - "loss": 0.5201, - "step": 157300 - }, - { - "epoch": 1.3906716879718524, - "grad_norm": 2.1704540252685547, - "learning_rate": 2.682213853380246e-05, - "loss": 0.6815, - "step": 157310 - }, - { - "epoch": 1.3907600912321647, - "grad_norm": 2.329289197921753, - "learning_rate": 2.682066514613059e-05, - "loss": 0.6119, - "step": 157320 - }, - { - "epoch": 1.3908484944924768, - "grad_norm": 11.108922004699707, - "learning_rate": 2.6819191758458723e-05, - "loss": 0.5553, - "step": 157330 - }, - { - "epoch": 1.3909368977527892, - "grad_norm": 16.72702980041504, - "learning_rate": 2.6817718370786848e-05, - "loss": 0.4598, - "step": 157340 - }, - { - "epoch": 1.3910253010131013, - "grad_norm": 2.5221312046051025, - "learning_rate": 2.681624498311498e-05, - "loss": 0.6026, - "step": 157350 - }, - { - "epoch": 1.3911137042734136, - "grad_norm": 2.9180703163146973, - "learning_rate": 2.6814771595443105e-05, - "loss": 0.5419, - "step": 157360 - }, - { - "epoch": 1.3912021075337258, - "grad_norm": 0.9406756162643433, - "learning_rate": 2.6813298207771237e-05, - "loss": 0.5041, - "step": 157370 - }, - { - "epoch": 1.391290510794038, - "grad_norm": 1.0423914194107056, - "learning_rate": 2.681182482009937e-05, - "loss": 0.5697, - "step": 157380 - }, - { - "epoch": 1.3913789140543504, - "grad_norm": 1.1540026664733887, - "learning_rate": 2.6810351432427493e-05, - "loss": 0.5804, - "step": 157390 - }, - { - "epoch": 1.3914673173146626, - "grad_norm": 5.698990345001221, - "learning_rate": 2.6808878044755625e-05, - "loss": 0.5871, - "step": 157400 - }, - { - "epoch": 1.3915557205749747, - "grad_norm": 1.7291351556777954, - "learning_rate": 2.6807404657083757e-05, - "loss": 0.6036, - "step": 157410 - }, - { - "epoch": 1.391644123835287, - "grad_norm": 1.8603672981262207, - "learning_rate": 2.6805931269411882e-05, - "loss": 0.7682, - "step": 157420 - }, - { - "epoch": 1.3917325270955994, - "grad_norm": 2.7575790882110596, - "learning_rate": 2.6804457881740013e-05, - "loss": 0.633, - "step": 157430 - }, - { - "epoch": 1.3918209303559115, - "grad_norm": 5.373757839202881, - "learning_rate": 2.6802984494068145e-05, - "loss": 0.7215, - "step": 157440 - }, - { - "epoch": 1.3919093336162238, - "grad_norm": 1.2322953939437866, - "learning_rate": 2.680151110639627e-05, - "loss": 0.5785, - "step": 157450 - }, - { - "epoch": 1.391997736876536, - "grad_norm": 3.9250879287719727, - "learning_rate": 2.6800037718724402e-05, - "loss": 0.5582, - "step": 157460 - }, - { - "epoch": 1.3920861401368483, - "grad_norm": 2.4116668701171875, - "learning_rate": 2.6798564331052527e-05, - "loss": 0.6289, - "step": 157470 - }, - { - "epoch": 1.3921745433971604, - "grad_norm": 1.380910873413086, - "learning_rate": 2.679709094338066e-05, - "loss": 0.5894, - "step": 157480 - }, - { - "epoch": 1.3922629466574727, - "grad_norm": 1.8472890853881836, - "learning_rate": 2.679561755570879e-05, - "loss": 0.5763, - "step": 157490 - }, - { - "epoch": 1.392351349917785, - "grad_norm": 1.5728129148483276, - "learning_rate": 2.6794144168036915e-05, - "loss": 0.535, - "step": 157500 - }, - { - "epoch": 1.3924397531780972, - "grad_norm": 1.559787631034851, - "learning_rate": 2.6792670780365047e-05, - "loss": 0.5391, - "step": 157510 - }, - { - "epoch": 1.3925281564384093, - "grad_norm": 2.645859479904175, - "learning_rate": 2.679119739269318e-05, - "loss": 0.5894, - "step": 157520 - }, - { - "epoch": 1.3926165596987217, - "grad_norm": 17.005584716796875, - "learning_rate": 2.6789724005021304e-05, - "loss": 0.6811, - "step": 157530 - }, - { - "epoch": 1.392704962959034, - "grad_norm": 2.3037030696868896, - "learning_rate": 2.6788250617349436e-05, - "loss": 0.7113, - "step": 157540 - }, - { - "epoch": 1.3927933662193461, - "grad_norm": 6.945370674133301, - "learning_rate": 2.6786777229677567e-05, - "loss": 0.5779, - "step": 157550 - }, - { - "epoch": 1.3928817694796585, - "grad_norm": 2.1154115200042725, - "learning_rate": 2.6785303842005692e-05, - "loss": 0.6004, - "step": 157560 - }, - { - "epoch": 1.3929701727399706, - "grad_norm": 1.3923304080963135, - "learning_rate": 2.6783830454333824e-05, - "loss": 0.6148, - "step": 157570 - }, - { - "epoch": 1.393058576000283, - "grad_norm": 5.411924839019775, - "learning_rate": 2.678235706666195e-05, - "loss": 0.5789, - "step": 157580 - }, - { - "epoch": 1.393146979260595, - "grad_norm": 3.663203001022339, - "learning_rate": 2.678088367899008e-05, - "loss": 0.5842, - "step": 157590 - }, - { - "epoch": 1.3932353825209074, - "grad_norm": 1.2214984893798828, - "learning_rate": 2.6779410291318212e-05, - "loss": 0.636, - "step": 157600 - }, - { - "epoch": 1.3933237857812197, - "grad_norm": 2.6077592372894287, - "learning_rate": 2.6777936903646337e-05, - "loss": 0.6277, - "step": 157610 - }, - { - "epoch": 1.3934121890415319, - "grad_norm": 1.0479772090911865, - "learning_rate": 2.677646351597447e-05, - "loss": 0.6629, - "step": 157620 - }, - { - "epoch": 1.393500592301844, - "grad_norm": 9.980446815490723, - "learning_rate": 2.67749901283026e-05, - "loss": 0.574, - "step": 157630 - }, - { - "epoch": 1.3935889955621563, - "grad_norm": 11.375358581542969, - "learning_rate": 2.6773516740630726e-05, - "loss": 0.5764, - "step": 157640 - }, - { - "epoch": 1.3936773988224687, - "grad_norm": 1.025200605392456, - "learning_rate": 2.6772043352958858e-05, - "loss": 0.6117, - "step": 157650 - }, - { - "epoch": 1.3937658020827808, - "grad_norm": 1.7183079719543457, - "learning_rate": 2.677056996528699e-05, - "loss": 0.5069, - "step": 157660 - }, - { - "epoch": 1.3938542053430931, - "grad_norm": 1.4647001028060913, - "learning_rate": 2.6769096577615114e-05, - "loss": 0.6947, - "step": 157670 - }, - { - "epoch": 1.3939426086034052, - "grad_norm": 1.9194972515106201, - "learning_rate": 2.6767623189943246e-05, - "loss": 0.5888, - "step": 157680 - }, - { - "epoch": 1.3940310118637176, - "grad_norm": 5.285514831542969, - "learning_rate": 2.6766149802271374e-05, - "loss": 0.661, - "step": 157690 - }, - { - "epoch": 1.3941194151240297, - "grad_norm": 2.15682053565979, - "learning_rate": 2.6764676414599503e-05, - "loss": 0.6467, - "step": 157700 - }, - { - "epoch": 1.394207818384342, - "grad_norm": 2.0305368900299072, - "learning_rate": 2.6763203026927634e-05, - "loss": 0.698, - "step": 157710 - }, - { - "epoch": 1.3942962216446544, - "grad_norm": 3.1504745483398438, - "learning_rate": 2.6761729639255763e-05, - "loss": 0.6174, - "step": 157720 - }, - { - "epoch": 1.3943846249049665, - "grad_norm": 3.881854295730591, - "learning_rate": 2.676025625158389e-05, - "loss": 0.6993, - "step": 157730 - }, - { - "epoch": 1.3944730281652786, - "grad_norm": 2.65567684173584, - "learning_rate": 2.6758782863912023e-05, - "loss": 0.6831, - "step": 157740 - }, - { - "epoch": 1.394561431425591, - "grad_norm": 1.3427464962005615, - "learning_rate": 2.675730947624015e-05, - "loss": 0.5069, - "step": 157750 - }, - { - "epoch": 1.3946498346859033, - "grad_norm": 18.119110107421875, - "learning_rate": 2.675583608856828e-05, - "loss": 0.685, - "step": 157760 - }, - { - "epoch": 1.3947382379462154, - "grad_norm": 0.9084869027137756, - "learning_rate": 2.675436270089641e-05, - "loss": 0.4774, - "step": 157770 - }, - { - "epoch": 1.3948266412065278, - "grad_norm": 3.3470420837402344, - "learning_rate": 2.675288931322454e-05, - "loss": 0.6149, - "step": 157780 - }, - { - "epoch": 1.39491504446684, - "grad_norm": 0.980299174785614, - "learning_rate": 2.6751415925552668e-05, - "loss": 0.5974, - "step": 157790 - }, - { - "epoch": 1.3950034477271522, - "grad_norm": 7.012294769287109, - "learning_rate": 2.67499425378808e-05, - "loss": 0.7202, - "step": 157800 - }, - { - "epoch": 1.3950918509874644, - "grad_norm": 1.5065248012542725, - "learning_rate": 2.6748469150208928e-05, - "loss": 0.5093, - "step": 157810 - }, - { - "epoch": 1.3951802542477767, - "grad_norm": 5.286609649658203, - "learning_rate": 2.6746995762537057e-05, - "loss": 0.705, - "step": 157820 - }, - { - "epoch": 1.3952686575080888, - "grad_norm": 2.169598340988159, - "learning_rate": 2.6745522374865185e-05, - "loss": 0.6473, - "step": 157830 - }, - { - "epoch": 1.3953570607684012, - "grad_norm": 14.844645500183105, - "learning_rate": 2.6744048987193317e-05, - "loss": 0.672, - "step": 157840 - }, - { - "epoch": 1.3954454640287133, - "grad_norm": 2.981980800628662, - "learning_rate": 2.6742575599521445e-05, - "loss": 0.4646, - "step": 157850 - }, - { - "epoch": 1.3955338672890256, - "grad_norm": 1.0732083320617676, - "learning_rate": 2.6741102211849573e-05, - "loss": 0.6207, - "step": 157860 - }, - { - "epoch": 1.395622270549338, - "grad_norm": 2.8117666244506836, - "learning_rate": 2.6739628824177705e-05, - "loss": 0.58, - "step": 157870 - }, - { - "epoch": 1.39571067380965, - "grad_norm": 2.195852518081665, - "learning_rate": 2.6738155436505833e-05, - "loss": 0.662, - "step": 157880 - }, - { - "epoch": 1.3957990770699622, - "grad_norm": 1.915737509727478, - "learning_rate": 2.6736682048833962e-05, - "loss": 0.5721, - "step": 157890 - }, - { - "epoch": 1.3958874803302745, - "grad_norm": 6.411036968231201, - "learning_rate": 2.6735208661162094e-05, - "loss": 0.7025, - "step": 157900 - }, - { - "epoch": 1.3959758835905869, - "grad_norm": 1.482609748840332, - "learning_rate": 2.6733735273490222e-05, - "loss": 0.5779, - "step": 157910 - }, - { - "epoch": 1.396064286850899, - "grad_norm": 3.666043996810913, - "learning_rate": 2.673226188581835e-05, - "loss": 0.7195, - "step": 157920 - }, - { - "epoch": 1.3961526901112113, - "grad_norm": 2.3935749530792236, - "learning_rate": 2.6730788498146482e-05, - "loss": 0.627, - "step": 157930 - }, - { - "epoch": 1.3962410933715235, - "grad_norm": 2.3432655334472656, - "learning_rate": 2.6729315110474607e-05, - "loss": 0.4989, - "step": 157940 - }, - { - "epoch": 1.3963294966318358, - "grad_norm": 2.711402177810669, - "learning_rate": 2.672784172280274e-05, - "loss": 0.5546, - "step": 157950 - }, - { - "epoch": 1.396417899892148, - "grad_norm": 2.282696485519409, - "learning_rate": 2.672636833513087e-05, - "loss": 0.5195, - "step": 157960 - }, - { - "epoch": 1.3965063031524603, - "grad_norm": 9.077699661254883, - "learning_rate": 2.6724894947458995e-05, - "loss": 0.5887, - "step": 157970 - }, - { - "epoch": 1.3965947064127726, - "grad_norm": 17.72617530822754, - "learning_rate": 2.6723421559787127e-05, - "loss": 0.5331, - "step": 157980 - }, - { - "epoch": 1.3966831096730847, - "grad_norm": 1.9646657705307007, - "learning_rate": 2.672194817211526e-05, - "loss": 0.5663, - "step": 157990 - }, - { - "epoch": 1.3967715129333969, - "grad_norm": 3.6485767364501953, - "learning_rate": 2.6720474784443384e-05, - "loss": 0.6359, - "step": 158000 - }, - { - "epoch": 1.3968599161937092, - "grad_norm": 4.137617588043213, - "learning_rate": 2.6719001396771516e-05, - "loss": 0.6042, - "step": 158010 - }, - { - "epoch": 1.3969483194540215, - "grad_norm": 2.2869791984558105, - "learning_rate": 2.6717528009099647e-05, - "loss": 0.5228, - "step": 158020 - }, - { - "epoch": 1.3970367227143337, - "grad_norm": 1.1500113010406494, - "learning_rate": 2.6716054621427772e-05, - "loss": 0.5813, - "step": 158030 - }, - { - "epoch": 1.397125125974646, - "grad_norm": 1.4167678356170654, - "learning_rate": 2.6714581233755904e-05, - "loss": 0.5669, - "step": 158040 - }, - { - "epoch": 1.3972135292349581, - "grad_norm": 7.787479877471924, - "learning_rate": 2.671310784608403e-05, - "loss": 0.6369, - "step": 158050 - }, - { - "epoch": 1.3973019324952705, - "grad_norm": 1.4218847751617432, - "learning_rate": 2.671163445841216e-05, - "loss": 0.4492, - "step": 158060 - }, - { - "epoch": 1.3973903357555826, - "grad_norm": 1.9995653629302979, - "learning_rate": 2.6710161070740293e-05, - "loss": 0.5917, - "step": 158070 - }, - { - "epoch": 1.397478739015895, - "grad_norm": 4.255588531494141, - "learning_rate": 2.6708687683068417e-05, - "loss": 0.7148, - "step": 158080 - }, - { - "epoch": 1.3975671422762073, - "grad_norm": 1.1951148509979248, - "learning_rate": 2.670721429539655e-05, - "loss": 0.5146, - "step": 158090 - }, - { - "epoch": 1.3976555455365194, - "grad_norm": 2.986920118331909, - "learning_rate": 2.670574090772468e-05, - "loss": 0.5559, - "step": 158100 - }, - { - "epoch": 1.3977439487968315, - "grad_norm": 7.708232879638672, - "learning_rate": 2.6704267520052806e-05, - "loss": 0.6666, - "step": 158110 - }, - { - "epoch": 1.3978323520571438, - "grad_norm": 1.7252209186553955, - "learning_rate": 2.6702794132380938e-05, - "loss": 0.5863, - "step": 158120 - }, - { - "epoch": 1.3979207553174562, - "grad_norm": 2.579143524169922, - "learning_rate": 2.670132074470907e-05, - "loss": 0.6263, - "step": 158130 - }, - { - "epoch": 1.3980091585777683, - "grad_norm": 2.1502201557159424, - "learning_rate": 2.6699847357037194e-05, - "loss": 0.5237, - "step": 158140 - }, - { - "epoch": 1.3980975618380806, - "grad_norm": 6.006604194641113, - "learning_rate": 2.6698373969365326e-05, - "loss": 0.6211, - "step": 158150 - }, - { - "epoch": 1.3981859650983928, - "grad_norm": 1.392369270324707, - "learning_rate": 2.669690058169345e-05, - "loss": 0.7232, - "step": 158160 - }, - { - "epoch": 1.398274368358705, - "grad_norm": 3.991595983505249, - "learning_rate": 2.6695427194021583e-05, - "loss": 0.7598, - "step": 158170 - }, - { - "epoch": 1.3983627716190172, - "grad_norm": 12.469320297241211, - "learning_rate": 2.6693953806349715e-05, - "loss": 0.5641, - "step": 158180 - }, - { - "epoch": 1.3984511748793296, - "grad_norm": 2.5988597869873047, - "learning_rate": 2.669248041867784e-05, - "loss": 0.5599, - "step": 158190 - }, - { - "epoch": 1.398539578139642, - "grad_norm": 2.2182259559631348, - "learning_rate": 2.669100703100597e-05, - "loss": 0.5853, - "step": 158200 - }, - { - "epoch": 1.398627981399954, - "grad_norm": 3.1300642490386963, - "learning_rate": 2.6689533643334103e-05, - "loss": 0.5051, - "step": 158210 - }, - { - "epoch": 1.3987163846602662, - "grad_norm": 1.35454523563385, - "learning_rate": 2.6688060255662228e-05, - "loss": 0.4375, - "step": 158220 - }, - { - "epoch": 1.3988047879205785, - "grad_norm": 1.7835090160369873, - "learning_rate": 2.668658686799036e-05, - "loss": 0.6597, - "step": 158230 - }, - { - "epoch": 1.3988931911808908, - "grad_norm": 5.0810346603393555, - "learning_rate": 2.668511348031849e-05, - "loss": 0.6704, - "step": 158240 - }, - { - "epoch": 1.398981594441203, - "grad_norm": 4.184632301330566, - "learning_rate": 2.6683640092646616e-05, - "loss": 0.4617, - "step": 158250 - }, - { - "epoch": 1.3990699977015153, - "grad_norm": 1.9390873908996582, - "learning_rate": 2.6682166704974748e-05, - "loss": 0.5741, - "step": 158260 - }, - { - "epoch": 1.3991584009618274, - "grad_norm": 1.558897852897644, - "learning_rate": 2.668069331730288e-05, - "loss": 0.7577, - "step": 158270 - }, - { - "epoch": 1.3992468042221398, - "grad_norm": 4.806708812713623, - "learning_rate": 2.6679219929631005e-05, - "loss": 0.5403, - "step": 158280 - }, - { - "epoch": 1.3993352074824519, - "grad_norm": 2.3022091388702393, - "learning_rate": 2.6677746541959137e-05, - "loss": 0.6078, - "step": 158290 - }, - { - "epoch": 1.3994236107427642, - "grad_norm": 1.2062190771102905, - "learning_rate": 2.667627315428726e-05, - "loss": 0.6788, - "step": 158300 - }, - { - "epoch": 1.3995120140030766, - "grad_norm": 4.970940589904785, - "learning_rate": 2.6674799766615393e-05, - "loss": 0.6319, - "step": 158310 - }, - { - "epoch": 1.3996004172633887, - "grad_norm": 6.246344566345215, - "learning_rate": 2.6673326378943525e-05, - "loss": 0.7757, - "step": 158320 - }, - { - "epoch": 1.3996888205237008, - "grad_norm": 1.4345197677612305, - "learning_rate": 2.667185299127165e-05, - "loss": 0.609, - "step": 158330 - }, - { - "epoch": 1.3997772237840131, - "grad_norm": 1.247517466545105, - "learning_rate": 2.6670379603599782e-05, - "loss": 0.6853, - "step": 158340 - }, - { - "epoch": 1.3998656270443255, - "grad_norm": 2.3947219848632812, - "learning_rate": 2.6668906215927914e-05, - "loss": 0.5954, - "step": 158350 - }, - { - "epoch": 1.3999540303046376, - "grad_norm": 6.077724456787109, - "learning_rate": 2.666743282825604e-05, - "loss": 0.6926, - "step": 158360 - }, - { - "epoch": 1.40004243356495, - "grad_norm": 3.187283515930176, - "learning_rate": 2.666595944058417e-05, - "loss": 0.739, - "step": 158370 - }, - { - "epoch": 1.400130836825262, - "grad_norm": 3.419934034347534, - "learning_rate": 2.6664486052912302e-05, - "loss": 0.659, - "step": 158380 - }, - { - "epoch": 1.4002192400855744, - "grad_norm": 2.3514151573181152, - "learning_rate": 2.6663012665240427e-05, - "loss": 0.6331, - "step": 158390 - }, - { - "epoch": 1.4003076433458865, - "grad_norm": 1.5123956203460693, - "learning_rate": 2.666153927756856e-05, - "loss": 0.6871, - "step": 158400 - }, - { - "epoch": 1.4003960466061989, - "grad_norm": 2.739198923110962, - "learning_rate": 2.6660065889896684e-05, - "loss": 0.5129, - "step": 158410 - }, - { - "epoch": 1.400484449866511, - "grad_norm": 1.086140513420105, - "learning_rate": 2.6658592502224815e-05, - "loss": 0.564, - "step": 158420 - }, - { - "epoch": 1.4005728531268233, - "grad_norm": 3.4808900356292725, - "learning_rate": 2.6657119114552947e-05, - "loss": 0.6658, - "step": 158430 - }, - { - "epoch": 1.4006612563871355, - "grad_norm": 4.097965717315674, - "learning_rate": 2.6655645726881072e-05, - "loss": 0.5703, - "step": 158440 - }, - { - "epoch": 1.4007496596474478, - "grad_norm": 4.910766124725342, - "learning_rate": 2.6654172339209204e-05, - "loss": 0.6817, - "step": 158450 - }, - { - "epoch": 1.4008380629077601, - "grad_norm": 3.675067663192749, - "learning_rate": 2.6652698951537336e-05, - "loss": 0.6133, - "step": 158460 - }, - { - "epoch": 1.4009264661680723, - "grad_norm": 1.8795480728149414, - "learning_rate": 2.665122556386546e-05, - "loss": 0.6082, - "step": 158470 - }, - { - "epoch": 1.4010148694283846, - "grad_norm": 2.6172196865081787, - "learning_rate": 2.6649752176193592e-05, - "loss": 0.5722, - "step": 158480 - }, - { - "epoch": 1.4011032726886967, - "grad_norm": 13.755043029785156, - "learning_rate": 2.6648278788521724e-05, - "loss": 0.6059, - "step": 158490 - }, - { - "epoch": 1.401191675949009, - "grad_norm": 2.8032913208007812, - "learning_rate": 2.664680540084985e-05, - "loss": 0.7376, - "step": 158500 - }, - { - "epoch": 1.4012800792093212, - "grad_norm": 4.964893341064453, - "learning_rate": 2.664533201317798e-05, - "loss": 0.6779, - "step": 158510 - }, - { - "epoch": 1.4013684824696335, - "grad_norm": 3.5689101219177246, - "learning_rate": 2.6643858625506106e-05, - "loss": 0.6091, - "step": 158520 - }, - { - "epoch": 1.4014568857299456, - "grad_norm": 3.090955972671509, - "learning_rate": 2.6642385237834237e-05, - "loss": 0.6149, - "step": 158530 - }, - { - "epoch": 1.401545288990258, - "grad_norm": 1.0367218255996704, - "learning_rate": 2.664091185016237e-05, - "loss": 0.5828, - "step": 158540 - }, - { - "epoch": 1.40163369225057, - "grad_norm": 8.77690601348877, - "learning_rate": 2.6639438462490494e-05, - "loss": 0.6477, - "step": 158550 - }, - { - "epoch": 1.4017220955108824, - "grad_norm": 3.570495367050171, - "learning_rate": 2.6637965074818626e-05, - "loss": 0.5514, - "step": 158560 - }, - { - "epoch": 1.4018104987711948, - "grad_norm": 1.879351258277893, - "learning_rate": 2.6636491687146758e-05, - "loss": 0.628, - "step": 158570 - }, - { - "epoch": 1.401898902031507, - "grad_norm": 2.3856704235076904, - "learning_rate": 2.6635018299474883e-05, - "loss": 0.5588, - "step": 158580 - }, - { - "epoch": 1.401987305291819, - "grad_norm": 4.7609076499938965, - "learning_rate": 2.6633544911803014e-05, - "loss": 0.5506, - "step": 158590 - }, - { - "epoch": 1.4020757085521314, - "grad_norm": 9.955957412719727, - "learning_rate": 2.6632071524131146e-05, - "loss": 0.5023, - "step": 158600 - }, - { - "epoch": 1.4021641118124437, - "grad_norm": 3.885927438735962, - "learning_rate": 2.663059813645927e-05, - "loss": 0.7053, - "step": 158610 - }, - { - "epoch": 1.4022525150727558, - "grad_norm": 2.1473939418792725, - "learning_rate": 2.6629124748787403e-05, - "loss": 0.6117, - "step": 158620 - }, - { - "epoch": 1.4023409183330682, - "grad_norm": 1.6334092617034912, - "learning_rate": 2.662765136111553e-05, - "loss": 0.77, - "step": 158630 - }, - { - "epoch": 1.4024293215933803, - "grad_norm": 4.547221660614014, - "learning_rate": 2.662617797344366e-05, - "loss": 0.7352, - "step": 158640 - }, - { - "epoch": 1.4025177248536926, - "grad_norm": 1.7922965288162231, - "learning_rate": 2.662470458577179e-05, - "loss": 0.6089, - "step": 158650 - }, - { - "epoch": 1.4026061281140048, - "grad_norm": 4.786975860595703, - "learning_rate": 2.662323119809992e-05, - "loss": 0.5484, - "step": 158660 - }, - { - "epoch": 1.402694531374317, - "grad_norm": 5.477907657623291, - "learning_rate": 2.6621757810428048e-05, - "loss": 0.6103, - "step": 158670 - }, - { - "epoch": 1.4027829346346294, - "grad_norm": 3.5993690490722656, - "learning_rate": 2.662028442275618e-05, - "loss": 0.5428, - "step": 158680 - }, - { - "epoch": 1.4028713378949416, - "grad_norm": 0.7107141017913818, - "learning_rate": 2.6618811035084308e-05, - "loss": 0.5918, - "step": 158690 - }, - { - "epoch": 1.4029597411552537, - "grad_norm": 3.7796475887298584, - "learning_rate": 2.6617337647412436e-05, - "loss": 0.6606, - "step": 158700 - }, - { - "epoch": 1.403048144415566, - "grad_norm": 4.401938438415527, - "learning_rate": 2.6615864259740568e-05, - "loss": 0.6108, - "step": 158710 - }, - { - "epoch": 1.4031365476758784, - "grad_norm": 2.1124398708343506, - "learning_rate": 2.6614390872068697e-05, - "loss": 0.6109, - "step": 158720 - }, - { - "epoch": 1.4032249509361905, - "grad_norm": 1.367019772529602, - "learning_rate": 2.6612917484396825e-05, - "loss": 0.5916, - "step": 158730 - }, - { - "epoch": 1.4033133541965028, - "grad_norm": 2.4775075912475586, - "learning_rate": 2.6611444096724957e-05, - "loss": 0.6035, - "step": 158740 - }, - { - "epoch": 1.403401757456815, - "grad_norm": 1.916231393814087, - "learning_rate": 2.6609970709053085e-05, - "loss": 0.6342, - "step": 158750 - }, - { - "epoch": 1.4034901607171273, - "grad_norm": 4.126253604888916, - "learning_rate": 2.6608497321381213e-05, - "loss": 0.5982, - "step": 158760 - }, - { - "epoch": 1.4035785639774394, - "grad_norm": 2.7555999755859375, - "learning_rate": 2.660702393370934e-05, - "loss": 0.5341, - "step": 158770 - }, - { - "epoch": 1.4036669672377518, - "grad_norm": 1.5839358568191528, - "learning_rate": 2.6605550546037473e-05, - "loss": 0.5637, - "step": 158780 - }, - { - "epoch": 1.403755370498064, - "grad_norm": 2.5511538982391357, - "learning_rate": 2.6604077158365602e-05, - "loss": 0.5151, - "step": 158790 - }, - { - "epoch": 1.4038437737583762, - "grad_norm": 10.26308536529541, - "learning_rate": 2.660260377069373e-05, - "loss": 0.479, - "step": 158800 - }, - { - "epoch": 1.4039321770186883, - "grad_norm": 4.052664756774902, - "learning_rate": 2.6601130383021862e-05, - "loss": 0.5079, - "step": 158810 - }, - { - "epoch": 1.4040205802790007, - "grad_norm": 22.951419830322266, - "learning_rate": 2.659965699534999e-05, - "loss": 0.6827, - "step": 158820 - }, - { - "epoch": 1.404108983539313, - "grad_norm": 1.4759076833724976, - "learning_rate": 2.659818360767812e-05, - "loss": 0.5656, - "step": 158830 - }, - { - "epoch": 1.4041973867996251, - "grad_norm": 1.3229501247406006, - "learning_rate": 2.659671022000625e-05, - "loss": 0.5172, - "step": 158840 - }, - { - "epoch": 1.4042857900599375, - "grad_norm": 0.9039010405540466, - "learning_rate": 2.659523683233438e-05, - "loss": 0.6397, - "step": 158850 - }, - { - "epoch": 1.4043741933202496, - "grad_norm": 7.029833793640137, - "learning_rate": 2.6593763444662507e-05, - "loss": 0.7808, - "step": 158860 - }, - { - "epoch": 1.404462596580562, - "grad_norm": 7.425211429595947, - "learning_rate": 2.659229005699064e-05, - "loss": 0.4676, - "step": 158870 - }, - { - "epoch": 1.404550999840874, - "grad_norm": 3.775947093963623, - "learning_rate": 2.6590816669318764e-05, - "loss": 0.5577, - "step": 158880 - }, - { - "epoch": 1.4046394031011864, - "grad_norm": 6.46895694732666, - "learning_rate": 2.6589343281646895e-05, - "loss": 0.6114, - "step": 158890 - }, - { - "epoch": 1.4047278063614987, - "grad_norm": 1.6449109315872192, - "learning_rate": 2.6587869893975027e-05, - "loss": 0.6345, - "step": 158900 - }, - { - "epoch": 1.4048162096218109, - "grad_norm": 3.386824369430542, - "learning_rate": 2.6586396506303152e-05, - "loss": 0.6939, - "step": 158910 - }, - { - "epoch": 1.404904612882123, - "grad_norm": 2.3983635902404785, - "learning_rate": 2.6584923118631284e-05, - "loss": 0.5914, - "step": 158920 - }, - { - "epoch": 1.4049930161424353, - "grad_norm": 2.1223251819610596, - "learning_rate": 2.6583449730959416e-05, - "loss": 0.6003, - "step": 158930 - }, - { - "epoch": 1.4050814194027477, - "grad_norm": 11.794602394104004, - "learning_rate": 2.658197634328754e-05, - "loss": 0.6593, - "step": 158940 - }, - { - "epoch": 1.4051698226630598, - "grad_norm": 5.02594518661499, - "learning_rate": 2.6580502955615672e-05, - "loss": 0.6623, - "step": 158950 - }, - { - "epoch": 1.4052582259233721, - "grad_norm": 2.471235752105713, - "learning_rate": 2.6579029567943804e-05, - "loss": 0.5952, - "step": 158960 - }, - { - "epoch": 1.4053466291836842, - "grad_norm": 9.714351654052734, - "learning_rate": 2.657755618027193e-05, - "loss": 0.4271, - "step": 158970 - }, - { - "epoch": 1.4054350324439966, - "grad_norm": 1.7939386367797852, - "learning_rate": 2.657608279260006e-05, - "loss": 0.6081, - "step": 158980 - }, - { - "epoch": 1.4055234357043087, - "grad_norm": 3.073272228240967, - "learning_rate": 2.6574609404928186e-05, - "loss": 0.545, - "step": 158990 - }, - { - "epoch": 1.405611838964621, - "grad_norm": 4.8265228271484375, - "learning_rate": 2.6573136017256318e-05, - "loss": 0.8304, - "step": 159000 - }, - { - "epoch": 1.4057002422249334, - "grad_norm": 1.4160538911819458, - "learning_rate": 2.657166262958445e-05, - "loss": 0.5998, - "step": 159010 - }, - { - "epoch": 1.4057886454852455, - "grad_norm": 3.731612205505371, - "learning_rate": 2.6570189241912574e-05, - "loss": 0.712, - "step": 159020 - }, - { - "epoch": 1.4058770487455576, - "grad_norm": 8.12205696105957, - "learning_rate": 2.6568715854240706e-05, - "loss": 0.7205, - "step": 159030 - }, - { - "epoch": 1.40596545200587, - "grad_norm": 5.020598411560059, - "learning_rate": 2.6567242466568838e-05, - "loss": 0.5902, - "step": 159040 - }, - { - "epoch": 1.4060538552661823, - "grad_norm": 2.0906896591186523, - "learning_rate": 2.6565769078896963e-05, - "loss": 0.5751, - "step": 159050 - }, - { - "epoch": 1.4061422585264944, - "grad_norm": 6.701564311981201, - "learning_rate": 2.6564295691225094e-05, - "loss": 0.6301, - "step": 159060 - }, - { - "epoch": 1.4062306617868068, - "grad_norm": 3.2989742755889893, - "learning_rate": 2.6562822303553226e-05, - "loss": 0.5688, - "step": 159070 - }, - { - "epoch": 1.406319065047119, - "grad_norm": 1.159401774406433, - "learning_rate": 2.656134891588135e-05, - "loss": 0.4446, - "step": 159080 - }, - { - "epoch": 1.4064074683074312, - "grad_norm": 7.220983505249023, - "learning_rate": 2.6559875528209483e-05, - "loss": 0.5983, - "step": 159090 - }, - { - "epoch": 1.4064958715677434, - "grad_norm": 0.9707807302474976, - "learning_rate": 2.6558402140537608e-05, - "loss": 0.5463, - "step": 159100 - }, - { - "epoch": 1.4065842748280557, - "grad_norm": 1.8643614053726196, - "learning_rate": 2.655692875286574e-05, - "loss": 0.6093, - "step": 159110 - }, - { - "epoch": 1.4066726780883678, - "grad_norm": 3.016954183578491, - "learning_rate": 2.655545536519387e-05, - "loss": 0.682, - "step": 159120 - }, - { - "epoch": 1.4067610813486802, - "grad_norm": 1.7046053409576416, - "learning_rate": 2.6553981977521996e-05, - "loss": 0.5484, - "step": 159130 - }, - { - "epoch": 1.4068494846089923, - "grad_norm": 1.19966721534729, - "learning_rate": 2.6552508589850128e-05, - "loss": 0.5397, - "step": 159140 - }, - { - "epoch": 1.4069378878693046, - "grad_norm": 2.3112664222717285, - "learning_rate": 2.655103520217826e-05, - "loss": 0.6095, - "step": 159150 - }, - { - "epoch": 1.407026291129617, - "grad_norm": 2.450408935546875, - "learning_rate": 2.6549561814506385e-05, - "loss": 0.4302, - "step": 159160 - }, - { - "epoch": 1.407114694389929, - "grad_norm": 2.4362261295318604, - "learning_rate": 2.6548088426834516e-05, - "loss": 0.5715, - "step": 159170 - }, - { - "epoch": 1.4072030976502412, - "grad_norm": 4.261641979217529, - "learning_rate": 2.6546615039162648e-05, - "loss": 0.7641, - "step": 159180 - }, - { - "epoch": 1.4072915009105535, - "grad_norm": 3.0175554752349854, - "learning_rate": 2.6545141651490773e-05, - "loss": 0.5958, - "step": 159190 - }, - { - "epoch": 1.407379904170866, - "grad_norm": 7.465358257293701, - "learning_rate": 2.6543668263818905e-05, - "loss": 0.4886, - "step": 159200 - }, - { - "epoch": 1.407468307431178, - "grad_norm": 1.147223711013794, - "learning_rate": 2.6542194876147037e-05, - "loss": 0.6535, - "step": 159210 - }, - { - "epoch": 1.4075567106914904, - "grad_norm": 14.22011947631836, - "learning_rate": 2.654072148847516e-05, - "loss": 0.6468, - "step": 159220 - }, - { - "epoch": 1.4076451139518025, - "grad_norm": 1.5749155282974243, - "learning_rate": 2.6539248100803293e-05, - "loss": 0.6327, - "step": 159230 - }, - { - "epoch": 1.4077335172121148, - "grad_norm": 3.546191692352295, - "learning_rate": 2.653777471313142e-05, - "loss": 0.5151, - "step": 159240 - }, - { - "epoch": 1.407821920472427, - "grad_norm": 1.1169731616973877, - "learning_rate": 2.653630132545955e-05, - "loss": 0.5143, - "step": 159250 - }, - { - "epoch": 1.4079103237327393, - "grad_norm": 6.529577732086182, - "learning_rate": 2.6534827937787682e-05, - "loss": 0.6038, - "step": 159260 - }, - { - "epoch": 1.4079987269930516, - "grad_norm": 3.341996192932129, - "learning_rate": 2.6533354550115807e-05, - "loss": 0.6311, - "step": 159270 - }, - { - "epoch": 1.4080871302533637, - "grad_norm": 1.4324045181274414, - "learning_rate": 2.653188116244394e-05, - "loss": 0.6621, - "step": 159280 - }, - { - "epoch": 1.4081755335136759, - "grad_norm": 7.230814456939697, - "learning_rate": 2.653040777477207e-05, - "loss": 0.58, - "step": 159290 - }, - { - "epoch": 1.4082639367739882, - "grad_norm": 2.9738571643829346, - "learning_rate": 2.6528934387100195e-05, - "loss": 0.6256, - "step": 159300 - }, - { - "epoch": 1.4083523400343005, - "grad_norm": 4.846588134765625, - "learning_rate": 2.6527460999428327e-05, - "loss": 0.5342, - "step": 159310 - }, - { - "epoch": 1.4084407432946127, - "grad_norm": 2.3289072513580322, - "learning_rate": 2.652598761175646e-05, - "loss": 0.6672, - "step": 159320 - }, - { - "epoch": 1.408529146554925, - "grad_norm": 2.5556800365448, - "learning_rate": 2.6524514224084584e-05, - "loss": 0.5299, - "step": 159330 - }, - { - "epoch": 1.4086175498152371, - "grad_norm": 18.285154342651367, - "learning_rate": 2.6523040836412715e-05, - "loss": 0.5968, - "step": 159340 - }, - { - "epoch": 1.4087059530755495, - "grad_norm": 2.8540220260620117, - "learning_rate": 2.652156744874084e-05, - "loss": 0.68, - "step": 159350 - }, - { - "epoch": 1.4087943563358616, - "grad_norm": 2.933842658996582, - "learning_rate": 2.6520094061068972e-05, - "loss": 0.5594, - "step": 159360 - }, - { - "epoch": 1.408882759596174, - "grad_norm": 1.7848162651062012, - "learning_rate": 2.6518620673397104e-05, - "loss": 0.6775, - "step": 159370 - }, - { - "epoch": 1.4089711628564863, - "grad_norm": 5.437075138092041, - "learning_rate": 2.651714728572523e-05, - "loss": 0.5941, - "step": 159380 - }, - { - "epoch": 1.4090595661167984, - "grad_norm": 7.368473052978516, - "learning_rate": 2.651567389805336e-05, - "loss": 0.6777, - "step": 159390 - }, - { - "epoch": 1.4091479693771105, - "grad_norm": 1.5570006370544434, - "learning_rate": 2.6514200510381492e-05, - "loss": 0.5639, - "step": 159400 - }, - { - "epoch": 1.4092363726374229, - "grad_norm": 3.418001174926758, - "learning_rate": 2.6512727122709617e-05, - "loss": 0.6875, - "step": 159410 - }, - { - "epoch": 1.4093247758977352, - "grad_norm": 2.95265531539917, - "learning_rate": 2.651125373503775e-05, - "loss": 0.5776, - "step": 159420 - }, - { - "epoch": 1.4094131791580473, - "grad_norm": 1.4178526401519775, - "learning_rate": 2.650978034736588e-05, - "loss": 0.6171, - "step": 159430 - }, - { - "epoch": 1.4095015824183597, - "grad_norm": 3.679302930831909, - "learning_rate": 2.6508306959694006e-05, - "loss": 0.6201, - "step": 159440 - }, - { - "epoch": 1.4095899856786718, - "grad_norm": 2.993734359741211, - "learning_rate": 2.6506833572022137e-05, - "loss": 0.6115, - "step": 159450 - }, - { - "epoch": 1.4096783889389841, - "grad_norm": 3.4420623779296875, - "learning_rate": 2.6505360184350262e-05, - "loss": 0.6573, - "step": 159460 - }, - { - "epoch": 1.4097667921992962, - "grad_norm": 2.0916452407836914, - "learning_rate": 2.6503886796678394e-05, - "loss": 0.6566, - "step": 159470 - }, - { - "epoch": 1.4098551954596086, - "grad_norm": 1.305911660194397, - "learning_rate": 2.6502413409006526e-05, - "loss": 0.6405, - "step": 159480 - }, - { - "epoch": 1.409943598719921, - "grad_norm": 4.582038879394531, - "learning_rate": 2.650094002133465e-05, - "loss": 0.5164, - "step": 159490 - }, - { - "epoch": 1.410032001980233, - "grad_norm": 1.4076347351074219, - "learning_rate": 2.6499466633662783e-05, - "loss": 0.6497, - "step": 159500 - }, - { - "epoch": 1.4101204052405452, - "grad_norm": 1.8839563131332397, - "learning_rate": 2.6497993245990914e-05, - "loss": 0.6284, - "step": 159510 - }, - { - "epoch": 1.4102088085008575, - "grad_norm": 2.5630011558532715, - "learning_rate": 2.649651985831904e-05, - "loss": 0.6347, - "step": 159520 - }, - { - "epoch": 1.4102972117611698, - "grad_norm": 0.8112945556640625, - "learning_rate": 2.649504647064717e-05, - "loss": 0.6317, - "step": 159530 - }, - { - "epoch": 1.410385615021482, - "grad_norm": 2.7287089824676514, - "learning_rate": 2.6493573082975303e-05, - "loss": 0.6246, - "step": 159540 - }, - { - "epoch": 1.4104740182817943, - "grad_norm": 2.543942928314209, - "learning_rate": 2.6492099695303428e-05, - "loss": 0.6773, - "step": 159550 - }, - { - "epoch": 1.4105624215421064, - "grad_norm": 2.2022199630737305, - "learning_rate": 2.649062630763156e-05, - "loss": 0.5178, - "step": 159560 - }, - { - "epoch": 1.4106508248024188, - "grad_norm": 0.7935627102851868, - "learning_rate": 2.6489152919959688e-05, - "loss": 0.6608, - "step": 159570 - }, - { - "epoch": 1.4107392280627309, - "grad_norm": 1.3393452167510986, - "learning_rate": 2.648767953228782e-05, - "loss": 0.7416, - "step": 159580 - }, - { - "epoch": 1.4108276313230432, - "grad_norm": 2.2884154319763184, - "learning_rate": 2.6486206144615948e-05, - "loss": 0.7118, - "step": 159590 - }, - { - "epoch": 1.4109160345833556, - "grad_norm": 2.903428792953491, - "learning_rate": 2.6484732756944076e-05, - "loss": 0.5632, - "step": 159600 - }, - { - "epoch": 1.4110044378436677, - "grad_norm": 2.1919758319854736, - "learning_rate": 2.6483259369272208e-05, - "loss": 0.5821, - "step": 159610 - }, - { - "epoch": 1.4110928411039798, - "grad_norm": 4.861763000488281, - "learning_rate": 2.6481785981600336e-05, - "loss": 0.5739, - "step": 159620 - }, - { - "epoch": 1.4111812443642922, - "grad_norm": 3.7185757160186768, - "learning_rate": 2.6480312593928465e-05, - "loss": 0.5976, - "step": 159630 - }, - { - "epoch": 1.4112696476246045, - "grad_norm": 3.7825937271118164, - "learning_rate": 2.6478839206256597e-05, - "loss": 0.673, - "step": 159640 - }, - { - "epoch": 1.4113580508849166, - "grad_norm": 2.2832963466644287, - "learning_rate": 2.6477365818584725e-05, - "loss": 0.5858, - "step": 159650 - }, - { - "epoch": 1.411446454145229, - "grad_norm": 2.159510612487793, - "learning_rate": 2.6475892430912853e-05, - "loss": 0.5939, - "step": 159660 - }, - { - "epoch": 1.411534857405541, - "grad_norm": 7.305959701538086, - "learning_rate": 2.6474419043240985e-05, - "loss": 0.5744, - "step": 159670 - }, - { - "epoch": 1.4116232606658534, - "grad_norm": 1.4871844053268433, - "learning_rate": 2.6472945655569113e-05, - "loss": 0.6274, - "step": 159680 - }, - { - "epoch": 1.4117116639261655, - "grad_norm": 2.290940761566162, - "learning_rate": 2.6471472267897242e-05, - "loss": 0.5052, - "step": 159690 - }, - { - "epoch": 1.4118000671864779, - "grad_norm": 7.085480213165283, - "learning_rate": 2.6469998880225373e-05, - "loss": 0.5243, - "step": 159700 - }, - { - "epoch": 1.41188847044679, - "grad_norm": 4.206778526306152, - "learning_rate": 2.64685254925535e-05, - "loss": 0.5879, - "step": 159710 - }, - { - "epoch": 1.4119768737071023, - "grad_norm": 1.0725706815719604, - "learning_rate": 2.646705210488163e-05, - "loss": 0.5319, - "step": 159720 - }, - { - "epoch": 1.4120652769674145, - "grad_norm": 0.9569700956344604, - "learning_rate": 2.6465578717209762e-05, - "loss": 0.7327, - "step": 159730 - }, - { - "epoch": 1.4121536802277268, - "grad_norm": 6.424351692199707, - "learning_rate": 2.6464105329537887e-05, - "loss": 0.7711, - "step": 159740 - }, - { - "epoch": 1.4122420834880391, - "grad_norm": 17.041942596435547, - "learning_rate": 2.646263194186602e-05, - "loss": 0.5717, - "step": 159750 - }, - { - "epoch": 1.4123304867483513, - "grad_norm": 3.166179895401001, - "learning_rate": 2.646115855419415e-05, - "loss": 0.6865, - "step": 159760 - }, - { - "epoch": 1.4124188900086634, - "grad_norm": 1.822239637374878, - "learning_rate": 2.6459685166522275e-05, - "loss": 0.6063, - "step": 159770 - }, - { - "epoch": 1.4125072932689757, - "grad_norm": 14.279197692871094, - "learning_rate": 2.6458211778850407e-05, - "loss": 0.5028, - "step": 159780 - }, - { - "epoch": 1.412595696529288, - "grad_norm": 3.0916919708251953, - "learning_rate": 2.645673839117854e-05, - "loss": 0.6056, - "step": 159790 - }, - { - "epoch": 1.4126840997896002, - "grad_norm": 6.698751449584961, - "learning_rate": 2.6455265003506664e-05, - "loss": 0.6232, - "step": 159800 - }, - { - "epoch": 1.4127725030499125, - "grad_norm": 0.8570348024368286, - "learning_rate": 2.6453791615834796e-05, - "loss": 0.7168, - "step": 159810 - }, - { - "epoch": 1.4128609063102247, - "grad_norm": 1.9548002481460571, - "learning_rate": 2.645231822816292e-05, - "loss": 0.6415, - "step": 159820 - }, - { - "epoch": 1.412949309570537, - "grad_norm": 4.140524864196777, - "learning_rate": 2.6450844840491052e-05, - "loss": 0.5397, - "step": 159830 - }, - { - "epoch": 1.4130377128308491, - "grad_norm": 2.5898735523223877, - "learning_rate": 2.6449371452819184e-05, - "loss": 0.5723, - "step": 159840 - }, - { - "epoch": 1.4131261160911615, - "grad_norm": 7.782649040222168, - "learning_rate": 2.644789806514731e-05, - "loss": 0.7294, - "step": 159850 - }, - { - "epoch": 1.4132145193514738, - "grad_norm": 1.7335716485977173, - "learning_rate": 2.644642467747544e-05, - "loss": 0.5761, - "step": 159860 - }, - { - "epoch": 1.413302922611786, - "grad_norm": 1.9642634391784668, - "learning_rate": 2.6444951289803572e-05, - "loss": 0.5269, - "step": 159870 - }, - { - "epoch": 1.413391325872098, - "grad_norm": 1.5840822458267212, - "learning_rate": 2.6443477902131697e-05, - "loss": 0.7299, - "step": 159880 - }, - { - "epoch": 1.4134797291324104, - "grad_norm": 2.5457520484924316, - "learning_rate": 2.644200451445983e-05, - "loss": 0.7514, - "step": 159890 - }, - { - "epoch": 1.4135681323927227, - "grad_norm": 2.503714084625244, - "learning_rate": 2.644053112678796e-05, - "loss": 0.699, - "step": 159900 - }, - { - "epoch": 1.4136565356530348, - "grad_norm": 6.777674674987793, - "learning_rate": 2.6439057739116086e-05, - "loss": 0.7319, - "step": 159910 - }, - { - "epoch": 1.4137449389133472, - "grad_norm": 3.254171133041382, - "learning_rate": 2.6437584351444218e-05, - "loss": 0.6473, - "step": 159920 - }, - { - "epoch": 1.4138333421736593, - "grad_norm": 28.20862579345703, - "learning_rate": 2.6436110963772343e-05, - "loss": 0.657, - "step": 159930 - }, - { - "epoch": 1.4139217454339716, - "grad_norm": 2.420679807662964, - "learning_rate": 2.6434637576100474e-05, - "loss": 0.6593, - "step": 159940 - }, - { - "epoch": 1.4140101486942838, - "grad_norm": 3.686713933944702, - "learning_rate": 2.6433164188428606e-05, - "loss": 0.5086, - "step": 159950 - }, - { - "epoch": 1.414098551954596, - "grad_norm": 2.3647212982177734, - "learning_rate": 2.643169080075673e-05, - "loss": 0.5481, - "step": 159960 - }, - { - "epoch": 1.4141869552149084, - "grad_norm": 2.3430593013763428, - "learning_rate": 2.6430217413084863e-05, - "loss": 0.5618, - "step": 159970 - }, - { - "epoch": 1.4142753584752206, - "grad_norm": 7.406024932861328, - "learning_rate": 2.6428744025412994e-05, - "loss": 0.6182, - "step": 159980 - }, - { - "epoch": 1.4143637617355327, - "grad_norm": 1.0082461833953857, - "learning_rate": 2.642727063774112e-05, - "loss": 0.535, - "step": 159990 - }, - { - "epoch": 1.414452164995845, - "grad_norm": 7.226438522338867, - "learning_rate": 2.642579725006925e-05, - "loss": 0.5938, - "step": 160000 - }, - { - "epoch": 1.4145405682561574, - "grad_norm": 2.860383987426758, - "learning_rate": 2.6424323862397383e-05, - "loss": 0.7929, - "step": 160010 - }, - { - "epoch": 1.4146289715164695, - "grad_norm": 9.406736373901367, - "learning_rate": 2.6422850474725508e-05, - "loss": 0.5238, - "step": 160020 - }, - { - "epoch": 1.4147173747767818, - "grad_norm": 2.0454769134521484, - "learning_rate": 2.642137708705364e-05, - "loss": 0.5808, - "step": 160030 - }, - { - "epoch": 1.414805778037094, - "grad_norm": 3.2237532138824463, - "learning_rate": 2.641990369938177e-05, - "loss": 0.5386, - "step": 160040 - }, - { - "epoch": 1.4148941812974063, - "grad_norm": 3.441462278366089, - "learning_rate": 2.6418430311709896e-05, - "loss": 0.5946, - "step": 160050 - }, - { - "epoch": 1.4149825845577184, - "grad_norm": 1.8770806789398193, - "learning_rate": 2.6416956924038028e-05, - "loss": 0.5488, - "step": 160060 - }, - { - "epoch": 1.4150709878180308, - "grad_norm": 4.879007816314697, - "learning_rate": 2.6415483536366153e-05, - "loss": 0.6087, - "step": 160070 - }, - { - "epoch": 1.415159391078343, - "grad_norm": 0.9643071293830872, - "learning_rate": 2.6414010148694285e-05, - "loss": 0.6084, - "step": 160080 - }, - { - "epoch": 1.4152477943386552, - "grad_norm": 8.081360816955566, - "learning_rate": 2.6412536761022417e-05, - "loss": 0.6593, - "step": 160090 - }, - { - "epoch": 1.4153361975989673, - "grad_norm": 2.666673183441162, - "learning_rate": 2.641106337335054e-05, - "loss": 0.7004, - "step": 160100 - }, - { - "epoch": 1.4154246008592797, - "grad_norm": 3.298182725906372, - "learning_rate": 2.6409589985678673e-05, - "loss": 0.5644, - "step": 160110 - }, - { - "epoch": 1.415513004119592, - "grad_norm": 10.879555702209473, - "learning_rate": 2.6408116598006805e-05, - "loss": 0.5782, - "step": 160120 - }, - { - "epoch": 1.4156014073799041, - "grad_norm": 4.905949115753174, - "learning_rate": 2.640664321033493e-05, - "loss": 0.5191, - "step": 160130 - }, - { - "epoch": 1.4156898106402165, - "grad_norm": 17.285911560058594, - "learning_rate": 2.640516982266306e-05, - "loss": 0.5591, - "step": 160140 - }, - { - "epoch": 1.4157782139005286, - "grad_norm": 3.5555167198181152, - "learning_rate": 2.6403696434991193e-05, - "loss": 0.6025, - "step": 160150 - }, - { - "epoch": 1.415866617160841, - "grad_norm": 6.737716197967529, - "learning_rate": 2.640222304731932e-05, - "loss": 0.5683, - "step": 160160 - }, - { - "epoch": 1.415955020421153, - "grad_norm": 4.676901817321777, - "learning_rate": 2.640074965964745e-05, - "loss": 0.6096, - "step": 160170 - }, - { - "epoch": 1.4160434236814654, - "grad_norm": 1.5498571395874023, - "learning_rate": 2.6399276271975575e-05, - "loss": 0.4815, - "step": 160180 - }, - { - "epoch": 1.4161318269417777, - "grad_norm": 1.014817476272583, - "learning_rate": 2.6397802884303707e-05, - "loss": 0.5848, - "step": 160190 - }, - { - "epoch": 1.4162202302020899, - "grad_norm": 5.023947238922119, - "learning_rate": 2.639632949663184e-05, - "loss": 0.7855, - "step": 160200 - }, - { - "epoch": 1.416308633462402, - "grad_norm": 6.053834915161133, - "learning_rate": 2.6394856108959964e-05, - "loss": 0.5075, - "step": 160210 - }, - { - "epoch": 1.4163970367227143, - "grad_norm": 1.7487704753875732, - "learning_rate": 2.6393382721288095e-05, - "loss": 0.6308, - "step": 160220 - }, - { - "epoch": 1.4164854399830267, - "grad_norm": 1.0961353778839111, - "learning_rate": 2.6391909333616227e-05, - "loss": 0.6471, - "step": 160230 - }, - { - "epoch": 1.4165738432433388, - "grad_norm": 1.803863525390625, - "learning_rate": 2.6390435945944352e-05, - "loss": 0.5399, - "step": 160240 - }, - { - "epoch": 1.4166622465036511, - "grad_norm": 2.776106357574463, - "learning_rate": 2.6388962558272484e-05, - "loss": 0.5949, - "step": 160250 - }, - { - "epoch": 1.4167506497639633, - "grad_norm": 2.740386962890625, - "learning_rate": 2.6387489170600615e-05, - "loss": 0.6215, - "step": 160260 - }, - { - "epoch": 1.4168390530242756, - "grad_norm": 1.117795705795288, - "learning_rate": 2.638601578292874e-05, - "loss": 0.6517, - "step": 160270 - }, - { - "epoch": 1.4169274562845877, - "grad_norm": 2.1224026679992676, - "learning_rate": 2.6384542395256872e-05, - "loss": 0.6073, - "step": 160280 - }, - { - "epoch": 1.4170158595449, - "grad_norm": 2.0323450565338135, - "learning_rate": 2.6383069007584997e-05, - "loss": 0.6173, - "step": 160290 - }, - { - "epoch": 1.4171042628052122, - "grad_norm": 5.175277233123779, - "learning_rate": 2.638159561991313e-05, - "loss": 0.5712, - "step": 160300 - }, - { - "epoch": 1.4171926660655245, - "grad_norm": 2.6836841106414795, - "learning_rate": 2.638012223224126e-05, - "loss": 0.6467, - "step": 160310 - }, - { - "epoch": 1.4172810693258366, - "grad_norm": 2.9890382289886475, - "learning_rate": 2.6378648844569386e-05, - "loss": 0.6943, - "step": 160320 - }, - { - "epoch": 1.417369472586149, - "grad_norm": 4.35649299621582, - "learning_rate": 2.6377175456897517e-05, - "loss": 0.6489, - "step": 160330 - }, - { - "epoch": 1.4174578758464613, - "grad_norm": 1.9911431074142456, - "learning_rate": 2.637570206922565e-05, - "loss": 0.6182, - "step": 160340 - }, - { - "epoch": 1.4175462791067734, - "grad_norm": 1.7357922792434692, - "learning_rate": 2.6374228681553774e-05, - "loss": 0.5361, - "step": 160350 - }, - { - "epoch": 1.4176346823670856, - "grad_norm": 2.3180503845214844, - "learning_rate": 2.6372755293881906e-05, - "loss": 0.625, - "step": 160360 - }, - { - "epoch": 1.417723085627398, - "grad_norm": 1.7044185400009155, - "learning_rate": 2.6371281906210038e-05, - "loss": 0.5539, - "step": 160370 - }, - { - "epoch": 1.4178114888877102, - "grad_norm": 1.8271578550338745, - "learning_rate": 2.6369808518538163e-05, - "loss": 0.6551, - "step": 160380 - }, - { - "epoch": 1.4178998921480224, - "grad_norm": 2.401309013366699, - "learning_rate": 2.6368335130866294e-05, - "loss": 0.6728, - "step": 160390 - }, - { - "epoch": 1.4179882954083347, - "grad_norm": 1.3315385580062866, - "learning_rate": 2.6366861743194423e-05, - "loss": 0.5476, - "step": 160400 - }, - { - "epoch": 1.4180766986686468, - "grad_norm": 1.1570377349853516, - "learning_rate": 2.636538835552255e-05, - "loss": 0.6512, - "step": 160410 - }, - { - "epoch": 1.4181651019289592, - "grad_norm": 1.9309003353118896, - "learning_rate": 2.6363914967850683e-05, - "loss": 0.5329, - "step": 160420 - }, - { - "epoch": 1.4182535051892713, - "grad_norm": 3.001166820526123, - "learning_rate": 2.636244158017881e-05, - "loss": 0.5772, - "step": 160430 - }, - { - "epoch": 1.4183419084495836, - "grad_norm": 1.5392869710922241, - "learning_rate": 2.636096819250694e-05, - "loss": 0.6, - "step": 160440 - }, - { - "epoch": 1.418430311709896, - "grad_norm": 2.6735455989837646, - "learning_rate": 2.635949480483507e-05, - "loss": 0.4952, - "step": 160450 - }, - { - "epoch": 1.418518714970208, - "grad_norm": 1.7346298694610596, - "learning_rate": 2.63580214171632e-05, - "loss": 0.5309, - "step": 160460 - }, - { - "epoch": 1.4186071182305202, - "grad_norm": 4.668123722076416, - "learning_rate": 2.6356548029491328e-05, - "loss": 0.6442, - "step": 160470 - }, - { - "epoch": 1.4186955214908326, - "grad_norm": 8.75904369354248, - "learning_rate": 2.635507464181946e-05, - "loss": 0.4956, - "step": 160480 - }, - { - "epoch": 1.418783924751145, - "grad_norm": 8.470442771911621, - "learning_rate": 2.6353601254147588e-05, - "loss": 0.5853, - "step": 160490 - }, - { - "epoch": 1.418872328011457, - "grad_norm": 2.0491862297058105, - "learning_rate": 2.6352127866475716e-05, - "loss": 0.4914, - "step": 160500 - }, - { - "epoch": 1.4189607312717694, - "grad_norm": 2.6826112270355225, - "learning_rate": 2.6350654478803848e-05, - "loss": 0.4903, - "step": 160510 - }, - { - "epoch": 1.4190491345320815, - "grad_norm": 2.543534755706787, - "learning_rate": 2.6349181091131976e-05, - "loss": 0.7342, - "step": 160520 - }, - { - "epoch": 1.4191375377923938, - "grad_norm": 4.157847881317139, - "learning_rate": 2.6347707703460105e-05, - "loss": 0.8395, - "step": 160530 - }, - { - "epoch": 1.419225941052706, - "grad_norm": 2.462852954864502, - "learning_rate": 2.6346234315788233e-05, - "loss": 0.6625, - "step": 160540 - }, - { - "epoch": 1.4193143443130183, - "grad_norm": 2.1189374923706055, - "learning_rate": 2.6344760928116365e-05, - "loss": 0.5799, - "step": 160550 - }, - { - "epoch": 1.4194027475733306, - "grad_norm": 5.7805986404418945, - "learning_rate": 2.6343287540444493e-05, - "loss": 0.61, - "step": 160560 - }, - { - "epoch": 1.4194911508336427, - "grad_norm": 10.9738187789917, - "learning_rate": 2.634181415277262e-05, - "loss": 0.4662, - "step": 160570 - }, - { - "epoch": 1.4195795540939549, - "grad_norm": 1.3473695516586304, - "learning_rate": 2.6340340765100753e-05, - "loss": 0.6168, - "step": 160580 - }, - { - "epoch": 1.4196679573542672, - "grad_norm": 6.541378021240234, - "learning_rate": 2.633886737742888e-05, - "loss": 0.6607, - "step": 160590 - }, - { - "epoch": 1.4197563606145795, - "grad_norm": 1.9721786975860596, - "learning_rate": 2.633739398975701e-05, - "loss": 0.5672, - "step": 160600 - }, - { - "epoch": 1.4198447638748917, - "grad_norm": 4.219167709350586, - "learning_rate": 2.6335920602085142e-05, - "loss": 0.6612, - "step": 160610 - }, - { - "epoch": 1.419933167135204, - "grad_norm": 5.972991466522217, - "learning_rate": 2.633444721441327e-05, - "loss": 0.6709, - "step": 160620 - }, - { - "epoch": 1.4200215703955161, - "grad_norm": 4.19579553604126, - "learning_rate": 2.63329738267414e-05, - "loss": 0.6408, - "step": 160630 - }, - { - "epoch": 1.4201099736558285, - "grad_norm": 2.4083566665649414, - "learning_rate": 2.633150043906953e-05, - "loss": 0.5156, - "step": 160640 - }, - { - "epoch": 1.4201983769161406, - "grad_norm": 1.0021512508392334, - "learning_rate": 2.6330027051397655e-05, - "loss": 0.6418, - "step": 160650 - }, - { - "epoch": 1.420286780176453, - "grad_norm": 11.212654113769531, - "learning_rate": 2.6328553663725787e-05, - "loss": 0.6667, - "step": 160660 - }, - { - "epoch": 1.4203751834367653, - "grad_norm": 2.1402647495269775, - "learning_rate": 2.632708027605392e-05, - "loss": 0.6329, - "step": 160670 - }, - { - "epoch": 1.4204635866970774, - "grad_norm": 1.101129174232483, - "learning_rate": 2.6325606888382044e-05, - "loss": 0.6209, - "step": 160680 - }, - { - "epoch": 1.4205519899573895, - "grad_norm": 2.521413564682007, - "learning_rate": 2.6324133500710175e-05, - "loss": 0.7676, - "step": 160690 - }, - { - "epoch": 1.4206403932177019, - "grad_norm": 1.835071325302124, - "learning_rate": 2.6322660113038307e-05, - "loss": 0.5728, - "step": 160700 - }, - { - "epoch": 1.4207287964780142, - "grad_norm": 7.986724376678467, - "learning_rate": 2.6321186725366432e-05, - "loss": 0.6278, - "step": 160710 - }, - { - "epoch": 1.4208171997383263, - "grad_norm": 2.8970935344696045, - "learning_rate": 2.6319713337694564e-05, - "loss": 0.7514, - "step": 160720 - }, - { - "epoch": 1.4209056029986387, - "grad_norm": 3.4715514183044434, - "learning_rate": 2.6318239950022696e-05, - "loss": 0.6582, - "step": 160730 - }, - { - "epoch": 1.4209940062589508, - "grad_norm": 1.9553172588348389, - "learning_rate": 2.631676656235082e-05, - "loss": 0.5588, - "step": 160740 - }, - { - "epoch": 1.4210824095192631, - "grad_norm": 19.131589889526367, - "learning_rate": 2.6315293174678952e-05, - "loss": 0.6642, - "step": 160750 - }, - { - "epoch": 1.4211708127795752, - "grad_norm": 8.412508010864258, - "learning_rate": 2.6313819787007077e-05, - "loss": 0.6862, - "step": 160760 - }, - { - "epoch": 1.4212592160398876, - "grad_norm": 1.5035523176193237, - "learning_rate": 2.631234639933521e-05, - "loss": 0.594, - "step": 160770 - }, - { - "epoch": 1.4213476193002, - "grad_norm": 0.656141996383667, - "learning_rate": 2.631087301166334e-05, - "loss": 0.5416, - "step": 160780 - }, - { - "epoch": 1.421436022560512, - "grad_norm": 1.2475924491882324, - "learning_rate": 2.6309399623991466e-05, - "loss": 0.6128, - "step": 160790 - }, - { - "epoch": 1.4215244258208242, - "grad_norm": 1.5268205404281616, - "learning_rate": 2.6307926236319597e-05, - "loss": 0.6367, - "step": 160800 - }, - { - "epoch": 1.4216128290811365, - "grad_norm": 8.67261028289795, - "learning_rate": 2.630645284864773e-05, - "loss": 0.6368, - "step": 160810 - }, - { - "epoch": 1.4217012323414489, - "grad_norm": 1.3337799310684204, - "learning_rate": 2.6304979460975854e-05, - "loss": 0.511, - "step": 160820 - }, - { - "epoch": 1.421789635601761, - "grad_norm": 2.545447587966919, - "learning_rate": 2.6303506073303986e-05, - "loss": 0.6217, - "step": 160830 - }, - { - "epoch": 1.4218780388620733, - "grad_norm": 4.208387851715088, - "learning_rate": 2.6302032685632118e-05, - "loss": 0.6286, - "step": 160840 - }, - { - "epoch": 1.4219664421223854, - "grad_norm": 2.237874984741211, - "learning_rate": 2.6300559297960243e-05, - "loss": 0.5451, - "step": 160850 - }, - { - "epoch": 1.4220548453826978, - "grad_norm": 1.8355919122695923, - "learning_rate": 2.6299085910288374e-05, - "loss": 0.7452, - "step": 160860 - }, - { - "epoch": 1.42214324864301, - "grad_norm": 6.9248857498168945, - "learning_rate": 2.62976125226165e-05, - "loss": 0.7023, - "step": 160870 - }, - { - "epoch": 1.4222316519033222, - "grad_norm": 2.60052752494812, - "learning_rate": 2.629613913494463e-05, - "loss": 0.6069, - "step": 160880 - }, - { - "epoch": 1.4223200551636344, - "grad_norm": 1.3317762613296509, - "learning_rate": 2.6294665747272763e-05, - "loss": 0.5568, - "step": 160890 - }, - { - "epoch": 1.4224084584239467, - "grad_norm": 2.9765264987945557, - "learning_rate": 2.6293192359600888e-05, - "loss": 0.5409, - "step": 160900 - }, - { - "epoch": 1.4224968616842588, - "grad_norm": 2.9700498580932617, - "learning_rate": 2.629171897192902e-05, - "loss": 0.677, - "step": 160910 - }, - { - "epoch": 1.4225852649445712, - "grad_norm": 1.7976515293121338, - "learning_rate": 2.629024558425715e-05, - "loss": 0.5045, - "step": 160920 - }, - { - "epoch": 1.4226736682048835, - "grad_norm": 1.568293571472168, - "learning_rate": 2.6288772196585276e-05, - "loss": 0.6589, - "step": 160930 - }, - { - "epoch": 1.4227620714651956, - "grad_norm": 21.234567642211914, - "learning_rate": 2.6287298808913408e-05, - "loss": 0.5197, - "step": 160940 - }, - { - "epoch": 1.4228504747255077, - "grad_norm": 4.011499881744385, - "learning_rate": 2.628582542124154e-05, - "loss": 0.5433, - "step": 160950 - }, - { - "epoch": 1.42293887798582, - "grad_norm": 4.501284122467041, - "learning_rate": 2.6284352033569665e-05, - "loss": 0.4991, - "step": 160960 - }, - { - "epoch": 1.4230272812461324, - "grad_norm": 4.294429302215576, - "learning_rate": 2.6282878645897796e-05, - "loss": 0.613, - "step": 160970 - }, - { - "epoch": 1.4231156845064445, - "grad_norm": 2.0576703548431396, - "learning_rate": 2.6281405258225928e-05, - "loss": 0.6648, - "step": 160980 - }, - { - "epoch": 1.4232040877667569, - "grad_norm": 1.1466312408447266, - "learning_rate": 2.6279931870554053e-05, - "loss": 0.6383, - "step": 160990 - }, - { - "epoch": 1.423292491027069, - "grad_norm": 2.554060935974121, - "learning_rate": 2.6278458482882185e-05, - "loss": 0.7308, - "step": 161000 - }, - { - "epoch": 1.4233808942873813, - "grad_norm": 2.549853801727295, - "learning_rate": 2.627698509521031e-05, - "loss": 0.628, - "step": 161010 - }, - { - "epoch": 1.4234692975476935, - "grad_norm": 2.7782649993896484, - "learning_rate": 2.627551170753844e-05, - "loss": 0.6412, - "step": 161020 - }, - { - "epoch": 1.4235577008080058, - "grad_norm": 0.8129737973213196, - "learning_rate": 2.6274038319866573e-05, - "loss": 0.6006, - "step": 161030 - }, - { - "epoch": 1.4236461040683182, - "grad_norm": 7.073207378387451, - "learning_rate": 2.6272564932194698e-05, - "loss": 0.6677, - "step": 161040 - }, - { - "epoch": 1.4237345073286303, - "grad_norm": 1.9187803268432617, - "learning_rate": 2.627109154452283e-05, - "loss": 0.6267, - "step": 161050 - }, - { - "epoch": 1.4238229105889424, - "grad_norm": 0.6884849071502686, - "learning_rate": 2.6269618156850962e-05, - "loss": 0.6123, - "step": 161060 - }, - { - "epoch": 1.4239113138492547, - "grad_norm": 1.2558132410049438, - "learning_rate": 2.6268144769179087e-05, - "loss": 0.6612, - "step": 161070 - }, - { - "epoch": 1.423999717109567, - "grad_norm": 6.449822425842285, - "learning_rate": 2.626667138150722e-05, - "loss": 0.6341, - "step": 161080 - }, - { - "epoch": 1.4240881203698792, - "grad_norm": 1.1429452896118164, - "learning_rate": 2.626519799383535e-05, - "loss": 0.5647, - "step": 161090 - }, - { - "epoch": 1.4241765236301915, - "grad_norm": 3.5178356170654297, - "learning_rate": 2.6263724606163475e-05, - "loss": 0.6495, - "step": 161100 - }, - { - "epoch": 1.4242649268905037, - "grad_norm": 2.5806288719177246, - "learning_rate": 2.6262251218491607e-05, - "loss": 0.5313, - "step": 161110 - }, - { - "epoch": 1.424353330150816, - "grad_norm": 11.872706413269043, - "learning_rate": 2.6260777830819732e-05, - "loss": 0.685, - "step": 161120 - }, - { - "epoch": 1.4244417334111281, - "grad_norm": 1.9377710819244385, - "learning_rate": 2.6259304443147864e-05, - "loss": 0.713, - "step": 161130 - }, - { - "epoch": 1.4245301366714405, - "grad_norm": 4.7357683181762695, - "learning_rate": 2.6257831055475995e-05, - "loss": 0.6366, - "step": 161140 - }, - { - "epoch": 1.4246185399317528, - "grad_norm": 1.9626750946044922, - "learning_rate": 2.625635766780412e-05, - "loss": 0.5811, - "step": 161150 - }, - { - "epoch": 1.424706943192065, - "grad_norm": 4.0224223136901855, - "learning_rate": 2.6254884280132252e-05, - "loss": 0.4357, - "step": 161160 - }, - { - "epoch": 1.424795346452377, - "grad_norm": 4.830257892608643, - "learning_rate": 2.6253410892460384e-05, - "loss": 0.6453, - "step": 161170 - }, - { - "epoch": 1.4248837497126894, - "grad_norm": 4.101064205169678, - "learning_rate": 2.625193750478851e-05, - "loss": 0.681, - "step": 161180 - }, - { - "epoch": 1.4249721529730017, - "grad_norm": 1.207502841949463, - "learning_rate": 2.625046411711664e-05, - "loss": 0.4554, - "step": 161190 - }, - { - "epoch": 1.4250605562333138, - "grad_norm": 6.373531341552734, - "learning_rate": 2.6248990729444772e-05, - "loss": 0.7484, - "step": 161200 - }, - { - "epoch": 1.4251489594936262, - "grad_norm": 3.479339122772217, - "learning_rate": 2.6247517341772897e-05, - "loss": 0.6728, - "step": 161210 - }, - { - "epoch": 1.4252373627539383, - "grad_norm": 1.463140845298767, - "learning_rate": 2.624604395410103e-05, - "loss": 0.5925, - "step": 161220 - }, - { - "epoch": 1.4253257660142507, - "grad_norm": 8.887065887451172, - "learning_rate": 2.6244570566429154e-05, - "loss": 0.5616, - "step": 161230 - }, - { - "epoch": 1.4254141692745628, - "grad_norm": 1.3086721897125244, - "learning_rate": 2.6243097178757286e-05, - "loss": 0.5419, - "step": 161240 - }, - { - "epoch": 1.4255025725348751, - "grad_norm": 1.8700672388076782, - "learning_rate": 2.6241623791085417e-05, - "loss": 0.5145, - "step": 161250 - }, - { - "epoch": 1.4255909757951875, - "grad_norm": 2.630340337753296, - "learning_rate": 2.6240150403413542e-05, - "loss": 0.635, - "step": 161260 - }, - { - "epoch": 1.4256793790554996, - "grad_norm": 22.789064407348633, - "learning_rate": 2.6238677015741674e-05, - "loss": 0.5904, - "step": 161270 - }, - { - "epoch": 1.4257677823158117, - "grad_norm": 7.103511333465576, - "learning_rate": 2.6237203628069806e-05, - "loss": 0.5613, - "step": 161280 - }, - { - "epoch": 1.425856185576124, - "grad_norm": 1.8684812784194946, - "learning_rate": 2.623573024039793e-05, - "loss": 0.6027, - "step": 161290 - }, - { - "epoch": 1.4259445888364364, - "grad_norm": 1.2190165519714355, - "learning_rate": 2.6234256852726063e-05, - "loss": 0.5527, - "step": 161300 - }, - { - "epoch": 1.4260329920967485, - "grad_norm": 4.788529872894287, - "learning_rate": 2.6232783465054194e-05, - "loss": 0.5814, - "step": 161310 - }, - { - "epoch": 1.4261213953570608, - "grad_norm": 1.8191792964935303, - "learning_rate": 2.623131007738232e-05, - "loss": 0.6695, - "step": 161320 - }, - { - "epoch": 1.426209798617373, - "grad_norm": 0.710638165473938, - "learning_rate": 2.622983668971045e-05, - "loss": 0.6834, - "step": 161330 - }, - { - "epoch": 1.4262982018776853, - "grad_norm": 3.264592409133911, - "learning_rate": 2.622836330203858e-05, - "loss": 0.7281, - "step": 161340 - }, - { - "epoch": 1.4263866051379974, - "grad_norm": 1.5470978021621704, - "learning_rate": 2.6226889914366708e-05, - "loss": 0.618, - "step": 161350 - }, - { - "epoch": 1.4264750083983098, - "grad_norm": 1.203267216682434, - "learning_rate": 2.622541652669484e-05, - "loss": 0.6747, - "step": 161360 - }, - { - "epoch": 1.426563411658622, - "grad_norm": 1.1337255239486694, - "learning_rate": 2.6223943139022968e-05, - "loss": 0.4655, - "step": 161370 - }, - { - "epoch": 1.4266518149189342, - "grad_norm": 4.025588035583496, - "learning_rate": 2.6222469751351096e-05, - "loss": 0.5607, - "step": 161380 - }, - { - "epoch": 1.4267402181792463, - "grad_norm": 0.8390575051307678, - "learning_rate": 2.6220996363679228e-05, - "loss": 0.4946, - "step": 161390 - }, - { - "epoch": 1.4268286214395587, - "grad_norm": 2.9481260776519775, - "learning_rate": 2.6219522976007356e-05, - "loss": 0.533, - "step": 161400 - }, - { - "epoch": 1.426917024699871, - "grad_norm": 3.012434720993042, - "learning_rate": 2.6218049588335485e-05, - "loss": 0.4758, - "step": 161410 - }, - { - "epoch": 1.4270054279601831, - "grad_norm": 1.6087599992752075, - "learning_rate": 2.6216576200663616e-05, - "loss": 0.6749, - "step": 161420 - }, - { - "epoch": 1.4270938312204955, - "grad_norm": 1.224970817565918, - "learning_rate": 2.6215102812991745e-05, - "loss": 0.7121, - "step": 161430 - }, - { - "epoch": 1.4271822344808076, - "grad_norm": 1.0578094720840454, - "learning_rate": 2.6213629425319873e-05, - "loss": 0.5582, - "step": 161440 - }, - { - "epoch": 1.42727063774112, - "grad_norm": 9.980646133422852, - "learning_rate": 2.6212156037648005e-05, - "loss": 0.6451, - "step": 161450 - }, - { - "epoch": 1.427359041001432, - "grad_norm": 2.742506504058838, - "learning_rate": 2.6210682649976133e-05, - "loss": 0.6079, - "step": 161460 - }, - { - "epoch": 1.4274474442617444, - "grad_norm": 3.062483549118042, - "learning_rate": 2.620920926230426e-05, - "loss": 0.4081, - "step": 161470 - }, - { - "epoch": 1.4275358475220565, - "grad_norm": 4.115296840667725, - "learning_rate": 2.620773587463239e-05, - "loss": 0.71, - "step": 161480 - }, - { - "epoch": 1.4276242507823689, - "grad_norm": 2.8837597370147705, - "learning_rate": 2.620626248696052e-05, - "loss": 0.6073, - "step": 161490 - }, - { - "epoch": 1.427712654042681, - "grad_norm": 5.456223964691162, - "learning_rate": 2.620478909928865e-05, - "loss": 0.5854, - "step": 161500 - }, - { - "epoch": 1.4278010573029933, - "grad_norm": 2.9360814094543457, - "learning_rate": 2.620331571161678e-05, - "loss": 0.5733, - "step": 161510 - }, - { - "epoch": 1.4278894605633057, - "grad_norm": 1.341850757598877, - "learning_rate": 2.620184232394491e-05, - "loss": 0.5321, - "step": 161520 - }, - { - "epoch": 1.4279778638236178, - "grad_norm": 2.6372475624084473, - "learning_rate": 2.620036893627304e-05, - "loss": 0.6352, - "step": 161530 - }, - { - "epoch": 1.42806626708393, - "grad_norm": 3.1966259479522705, - "learning_rate": 2.6198895548601167e-05, - "loss": 0.5907, - "step": 161540 - }, - { - "epoch": 1.4281546703442423, - "grad_norm": 4.268582820892334, - "learning_rate": 2.61974221609293e-05, - "loss": 0.666, - "step": 161550 - }, - { - "epoch": 1.4282430736045546, - "grad_norm": 2.418524980545044, - "learning_rate": 2.6195948773257427e-05, - "loss": 0.5888, - "step": 161560 - }, - { - "epoch": 1.4283314768648667, - "grad_norm": 2.0414352416992188, - "learning_rate": 2.6194475385585555e-05, - "loss": 0.5097, - "step": 161570 - }, - { - "epoch": 1.428419880125179, - "grad_norm": 11.2678861618042, - "learning_rate": 2.6193001997913687e-05, - "loss": 0.596, - "step": 161580 - }, - { - "epoch": 1.4285082833854912, - "grad_norm": 12.495491981506348, - "learning_rate": 2.6191528610241812e-05, - "loss": 0.7022, - "step": 161590 - }, - { - "epoch": 1.4285966866458035, - "grad_norm": 2.315694808959961, - "learning_rate": 2.6190055222569944e-05, - "loss": 0.6095, - "step": 161600 - }, - { - "epoch": 1.4286850899061156, - "grad_norm": 3.6961419582366943, - "learning_rate": 2.6188581834898075e-05, - "loss": 0.6796, - "step": 161610 - }, - { - "epoch": 1.428773493166428, - "grad_norm": 2.283215284347534, - "learning_rate": 2.61871084472262e-05, - "loss": 0.5508, - "step": 161620 - }, - { - "epoch": 1.4288618964267403, - "grad_norm": 2.245480537414551, - "learning_rate": 2.6185635059554332e-05, - "loss": 0.6805, - "step": 161630 - }, - { - "epoch": 1.4289502996870524, - "grad_norm": 15.089563369750977, - "learning_rate": 2.6184161671882464e-05, - "loss": 0.6279, - "step": 161640 - }, - { - "epoch": 1.4290387029473646, - "grad_norm": 1.8494623899459839, - "learning_rate": 2.618268828421059e-05, - "loss": 0.6328, - "step": 161650 - }, - { - "epoch": 1.429127106207677, - "grad_norm": 3.1747725009918213, - "learning_rate": 2.618121489653872e-05, - "loss": 0.5979, - "step": 161660 - }, - { - "epoch": 1.4292155094679893, - "grad_norm": 1.5071965456008911, - "learning_rate": 2.6179741508866852e-05, - "loss": 0.5722, - "step": 161670 - }, - { - "epoch": 1.4293039127283014, - "grad_norm": 2.5523529052734375, - "learning_rate": 2.6178268121194977e-05, - "loss": 0.7458, - "step": 161680 - }, - { - "epoch": 1.4293923159886137, - "grad_norm": 3.0697720050811768, - "learning_rate": 2.617679473352311e-05, - "loss": 0.6483, - "step": 161690 - }, - { - "epoch": 1.4294807192489258, - "grad_norm": 6.666508674621582, - "learning_rate": 2.6175321345851234e-05, - "loss": 0.5427, - "step": 161700 - }, - { - "epoch": 1.4295691225092382, - "grad_norm": 4.020336151123047, - "learning_rate": 2.6173847958179366e-05, - "loss": 0.5794, - "step": 161710 - }, - { - "epoch": 1.4296575257695503, - "grad_norm": 3.8209495544433594, - "learning_rate": 2.6172374570507497e-05, - "loss": 0.5047, - "step": 161720 - }, - { - "epoch": 1.4297459290298626, - "grad_norm": 1.0950429439544678, - "learning_rate": 2.6170901182835622e-05, - "loss": 0.6292, - "step": 161730 - }, - { - "epoch": 1.429834332290175, - "grad_norm": 4.650197982788086, - "learning_rate": 2.6169427795163754e-05, - "loss": 0.686, - "step": 161740 - }, - { - "epoch": 1.429922735550487, - "grad_norm": 4.526788711547852, - "learning_rate": 2.6167954407491886e-05, - "loss": 0.5967, - "step": 161750 - }, - { - "epoch": 1.4300111388107992, - "grad_norm": 7.717674732208252, - "learning_rate": 2.616648101982001e-05, - "loss": 0.5825, - "step": 161760 - }, - { - "epoch": 1.4300995420711116, - "grad_norm": 2.3685898780822754, - "learning_rate": 2.6165007632148143e-05, - "loss": 0.6557, - "step": 161770 - }, - { - "epoch": 1.430187945331424, - "grad_norm": 3.4794111251831055, - "learning_rate": 2.6163534244476274e-05, - "loss": 0.5531, - "step": 161780 - }, - { - "epoch": 1.430276348591736, - "grad_norm": 4.8207316398620605, - "learning_rate": 2.61620608568044e-05, - "loss": 0.6737, - "step": 161790 - }, - { - "epoch": 1.4303647518520484, - "grad_norm": 2.528627634048462, - "learning_rate": 2.616058746913253e-05, - "loss": 0.5746, - "step": 161800 - }, - { - "epoch": 1.4304531551123605, - "grad_norm": 2.007368326187134, - "learning_rate": 2.6159114081460656e-05, - "loss": 0.5995, - "step": 161810 - }, - { - "epoch": 1.4305415583726728, - "grad_norm": 1.885964035987854, - "learning_rate": 2.6157640693788788e-05, - "loss": 0.611, - "step": 161820 - }, - { - "epoch": 1.430629961632985, - "grad_norm": 1.7199714183807373, - "learning_rate": 2.615616730611692e-05, - "loss": 0.6451, - "step": 161830 - }, - { - "epoch": 1.4307183648932973, - "grad_norm": 4.746889114379883, - "learning_rate": 2.6154693918445044e-05, - "loss": 0.4711, - "step": 161840 - }, - { - "epoch": 1.4308067681536096, - "grad_norm": 5.606736183166504, - "learning_rate": 2.6153220530773176e-05, - "loss": 0.7349, - "step": 161850 - }, - { - "epoch": 1.4308951714139218, - "grad_norm": 2.159475564956665, - "learning_rate": 2.6151747143101308e-05, - "loss": 0.5479, - "step": 161860 - }, - { - "epoch": 1.4309835746742339, - "grad_norm": 4.461320877075195, - "learning_rate": 2.6150273755429433e-05, - "loss": 0.6135, - "step": 161870 - }, - { - "epoch": 1.4310719779345462, - "grad_norm": 3.7734806537628174, - "learning_rate": 2.6148800367757565e-05, - "loss": 0.6893, - "step": 161880 - }, - { - "epoch": 1.4311603811948586, - "grad_norm": 5.611871719360352, - "learning_rate": 2.6147326980085696e-05, - "loss": 0.4517, - "step": 161890 - }, - { - "epoch": 1.4312487844551707, - "grad_norm": 3.5616016387939453, - "learning_rate": 2.614585359241382e-05, - "loss": 0.6392, - "step": 161900 - }, - { - "epoch": 1.431337187715483, - "grad_norm": 5.073832035064697, - "learning_rate": 2.6144380204741953e-05, - "loss": 0.5417, - "step": 161910 - }, - { - "epoch": 1.4314255909757951, - "grad_norm": 1.7248265743255615, - "learning_rate": 2.6142906817070085e-05, - "loss": 0.5248, - "step": 161920 - }, - { - "epoch": 1.4315139942361075, - "grad_norm": 2.3456459045410156, - "learning_rate": 2.614143342939821e-05, - "loss": 0.539, - "step": 161930 - }, - { - "epoch": 1.4316023974964196, - "grad_norm": 2.0335206985473633, - "learning_rate": 2.613996004172634e-05, - "loss": 0.5889, - "step": 161940 - }, - { - "epoch": 1.431690800756732, - "grad_norm": 5.259879112243652, - "learning_rate": 2.6138486654054467e-05, - "loss": 0.5908, - "step": 161950 - }, - { - "epoch": 1.4317792040170443, - "grad_norm": 1.8193250894546509, - "learning_rate": 2.6137013266382598e-05, - "loss": 0.5954, - "step": 161960 - }, - { - "epoch": 1.4318676072773564, - "grad_norm": 2.533898115158081, - "learning_rate": 2.613553987871073e-05, - "loss": 0.6349, - "step": 161970 - }, - { - "epoch": 1.4319560105376685, - "grad_norm": 7.703001499176025, - "learning_rate": 2.6134066491038855e-05, - "loss": 0.5896, - "step": 161980 - }, - { - "epoch": 1.4320444137979809, - "grad_norm": 4.382285118103027, - "learning_rate": 2.6132593103366987e-05, - "loss": 0.6507, - "step": 161990 - }, - { - "epoch": 1.4321328170582932, - "grad_norm": 6.28700590133667, - "learning_rate": 2.613111971569512e-05, - "loss": 0.5939, - "step": 162000 - }, - { - "epoch": 1.4322212203186053, - "grad_norm": 8.765546798706055, - "learning_rate": 2.6129646328023243e-05, - "loss": 0.597, - "step": 162010 - }, - { - "epoch": 1.4323096235789177, - "grad_norm": 2.822014808654785, - "learning_rate": 2.6128172940351375e-05, - "loss": 0.5538, - "step": 162020 - }, - { - "epoch": 1.4323980268392298, - "grad_norm": 2.5320301055908203, - "learning_rate": 2.6126699552679507e-05, - "loss": 0.545, - "step": 162030 - }, - { - "epoch": 1.4324864300995421, - "grad_norm": 3.5387613773345947, - "learning_rate": 2.6125226165007632e-05, - "loss": 0.5022, - "step": 162040 - }, - { - "epoch": 1.4325748333598542, - "grad_norm": 2.996692657470703, - "learning_rate": 2.6123752777335764e-05, - "loss": 0.6151, - "step": 162050 - }, - { - "epoch": 1.4326632366201666, - "grad_norm": 1.551916241645813, - "learning_rate": 2.612227938966389e-05, - "loss": 0.6087, - "step": 162060 - }, - { - "epoch": 1.4327516398804787, - "grad_norm": 1.2909657955169678, - "learning_rate": 2.612080600199202e-05, - "loss": 0.4813, - "step": 162070 - }, - { - "epoch": 1.432840043140791, - "grad_norm": 2.6515233516693115, - "learning_rate": 2.6119332614320152e-05, - "loss": 0.5572, - "step": 162080 - }, - { - "epoch": 1.4329284464011032, - "grad_norm": 1.394481897354126, - "learning_rate": 2.6117859226648277e-05, - "loss": 0.7136, - "step": 162090 - }, - { - "epoch": 1.4330168496614155, - "grad_norm": 2.159165620803833, - "learning_rate": 2.611638583897641e-05, - "loss": 0.497, - "step": 162100 - }, - { - "epoch": 1.4331052529217279, - "grad_norm": 2.1746418476104736, - "learning_rate": 2.611491245130454e-05, - "loss": 0.5899, - "step": 162110 - }, - { - "epoch": 1.43319365618204, - "grad_norm": 2.519761323928833, - "learning_rate": 2.6113439063632665e-05, - "loss": 0.5736, - "step": 162120 - }, - { - "epoch": 1.433282059442352, - "grad_norm": 2.9637722969055176, - "learning_rate": 2.6111965675960797e-05, - "loss": 0.6743, - "step": 162130 - }, - { - "epoch": 1.4333704627026644, - "grad_norm": 1.2997210025787354, - "learning_rate": 2.611049228828893e-05, - "loss": 0.5524, - "step": 162140 - }, - { - "epoch": 1.4334588659629768, - "grad_norm": 9.77399730682373, - "learning_rate": 2.6109018900617054e-05, - "loss": 0.6186, - "step": 162150 - }, - { - "epoch": 1.433547269223289, - "grad_norm": 1.4343018531799316, - "learning_rate": 2.6107545512945186e-05, - "loss": 0.5757, - "step": 162160 - }, - { - "epoch": 1.4336356724836012, - "grad_norm": 1.8964042663574219, - "learning_rate": 2.610607212527331e-05, - "loss": 0.678, - "step": 162170 - }, - { - "epoch": 1.4337240757439134, - "grad_norm": 2.3213462829589844, - "learning_rate": 2.6104598737601442e-05, - "loss": 0.5234, - "step": 162180 - }, - { - "epoch": 1.4338124790042257, - "grad_norm": 1.0815372467041016, - "learning_rate": 2.6103125349929574e-05, - "loss": 0.6681, - "step": 162190 - }, - { - "epoch": 1.4339008822645378, - "grad_norm": 2.6626393795013428, - "learning_rate": 2.61016519622577e-05, - "loss": 0.5253, - "step": 162200 - }, - { - "epoch": 1.4339892855248502, - "grad_norm": 1.4560598134994507, - "learning_rate": 2.610017857458583e-05, - "loss": 0.5838, - "step": 162210 - }, - { - "epoch": 1.4340776887851625, - "grad_norm": 1.633857011795044, - "learning_rate": 2.6098705186913963e-05, - "loss": 0.6727, - "step": 162220 - }, - { - "epoch": 1.4341660920454746, - "grad_norm": 3.4809625148773193, - "learning_rate": 2.6097231799242088e-05, - "loss": 0.6221, - "step": 162230 - }, - { - "epoch": 1.4342544953057867, - "grad_norm": 6.129171848297119, - "learning_rate": 2.609575841157022e-05, - "loss": 0.6702, - "step": 162240 - }, - { - "epoch": 1.434342898566099, - "grad_norm": 8.678262710571289, - "learning_rate": 2.609428502389835e-05, - "loss": 0.7219, - "step": 162250 - }, - { - "epoch": 1.4344313018264114, - "grad_norm": 3.63297700881958, - "learning_rate": 2.6092811636226476e-05, - "loss": 0.5457, - "step": 162260 - }, - { - "epoch": 1.4345197050867236, - "grad_norm": 3.3888328075408936, - "learning_rate": 2.6091338248554608e-05, - "loss": 0.554, - "step": 162270 - }, - { - "epoch": 1.434608108347036, - "grad_norm": 1.793877124786377, - "learning_rate": 2.6089864860882736e-05, - "loss": 0.5358, - "step": 162280 - }, - { - "epoch": 1.434696511607348, - "grad_norm": 3.278965473175049, - "learning_rate": 2.6088391473210864e-05, - "loss": 0.6237, - "step": 162290 - }, - { - "epoch": 1.4347849148676604, - "grad_norm": 1.1085797548294067, - "learning_rate": 2.6086918085538996e-05, - "loss": 0.6651, - "step": 162300 - }, - { - "epoch": 1.4348733181279725, - "grad_norm": 4.023129940032959, - "learning_rate": 2.6085444697867125e-05, - "loss": 0.6262, - "step": 162310 - }, - { - "epoch": 1.4349617213882848, - "grad_norm": 8.151616096496582, - "learning_rate": 2.6083971310195253e-05, - "loss": 0.5932, - "step": 162320 - }, - { - "epoch": 1.4350501246485972, - "grad_norm": 2.363712787628174, - "learning_rate": 2.6082497922523385e-05, - "loss": 0.5092, - "step": 162330 - }, - { - "epoch": 1.4351385279089093, - "grad_norm": 1.5764716863632202, - "learning_rate": 2.6081024534851513e-05, - "loss": 0.5242, - "step": 162340 - }, - { - "epoch": 1.4352269311692214, - "grad_norm": 1.7375887632369995, - "learning_rate": 2.607955114717964e-05, - "loss": 0.6371, - "step": 162350 - }, - { - "epoch": 1.4353153344295337, - "grad_norm": 4.226534366607666, - "learning_rate": 2.6078077759507773e-05, - "loss": 0.4833, - "step": 162360 - }, - { - "epoch": 1.435403737689846, - "grad_norm": 3.9281508922576904, - "learning_rate": 2.60766043718359e-05, - "loss": 0.5534, - "step": 162370 - }, - { - "epoch": 1.4354921409501582, - "grad_norm": 9.115564346313477, - "learning_rate": 2.607513098416403e-05, - "loss": 0.5556, - "step": 162380 - }, - { - "epoch": 1.4355805442104705, - "grad_norm": 7.714043617248535, - "learning_rate": 2.607365759649216e-05, - "loss": 0.4868, - "step": 162390 - }, - { - "epoch": 1.4356689474707827, - "grad_norm": 1.9511128664016724, - "learning_rate": 2.607218420882029e-05, - "loss": 0.5432, - "step": 162400 - }, - { - "epoch": 1.435757350731095, - "grad_norm": 1.6880097389221191, - "learning_rate": 2.6070710821148418e-05, - "loss": 0.6266, - "step": 162410 - }, - { - "epoch": 1.4358457539914071, - "grad_norm": 1.64362633228302, - "learning_rate": 2.6069237433476547e-05, - "loss": 0.6547, - "step": 162420 - }, - { - "epoch": 1.4359341572517195, - "grad_norm": 2.097661018371582, - "learning_rate": 2.606776404580468e-05, - "loss": 0.6609, - "step": 162430 - }, - { - "epoch": 1.4360225605120318, - "grad_norm": 1.2369998693466187, - "learning_rate": 2.6066290658132807e-05, - "loss": 0.5857, - "step": 162440 - }, - { - "epoch": 1.436110963772344, - "grad_norm": 4.781394958496094, - "learning_rate": 2.6064817270460935e-05, - "loss": 0.6015, - "step": 162450 - }, - { - "epoch": 1.436199367032656, - "grad_norm": 6.276465892791748, - "learning_rate": 2.6063343882789067e-05, - "loss": 0.5349, - "step": 162460 - }, - { - "epoch": 1.4362877702929684, - "grad_norm": 6.646275043487549, - "learning_rate": 2.6061870495117195e-05, - "loss": 0.5763, - "step": 162470 - }, - { - "epoch": 1.4363761735532807, - "grad_norm": 7.209773540496826, - "learning_rate": 2.6060397107445324e-05, - "loss": 0.5422, - "step": 162480 - }, - { - "epoch": 1.4364645768135929, - "grad_norm": 2.5789430141448975, - "learning_rate": 2.6058923719773455e-05, - "loss": 0.6021, - "step": 162490 - }, - { - "epoch": 1.4365529800739052, - "grad_norm": 3.2698092460632324, - "learning_rate": 2.6057450332101584e-05, - "loss": 0.6449, - "step": 162500 - }, - { - "epoch": 1.4366413833342173, - "grad_norm": 3.841197967529297, - "learning_rate": 2.6055976944429712e-05, - "loss": 0.6016, - "step": 162510 - }, - { - "epoch": 1.4367297865945297, - "grad_norm": 2.9219274520874023, - "learning_rate": 2.6054503556757844e-05, - "loss": 0.6316, - "step": 162520 - }, - { - "epoch": 1.4368181898548418, - "grad_norm": 4.1972503662109375, - "learning_rate": 2.605303016908597e-05, - "loss": 0.5689, - "step": 162530 - }, - { - "epoch": 1.4369065931151541, - "grad_norm": 2.036928415298462, - "learning_rate": 2.60515567814141e-05, - "loss": 0.5514, - "step": 162540 - }, - { - "epoch": 1.4369949963754665, - "grad_norm": 1.4042549133300781, - "learning_rate": 2.6050083393742232e-05, - "loss": 0.6013, - "step": 162550 - }, - { - "epoch": 1.4370833996357786, - "grad_norm": 2.5507240295410156, - "learning_rate": 2.6048610006070357e-05, - "loss": 0.5252, - "step": 162560 - }, - { - "epoch": 1.4371718028960907, - "grad_norm": 14.307161331176758, - "learning_rate": 2.604713661839849e-05, - "loss": 0.6689, - "step": 162570 - }, - { - "epoch": 1.437260206156403, - "grad_norm": 2.6624224185943604, - "learning_rate": 2.604566323072662e-05, - "loss": 0.5598, - "step": 162580 - }, - { - "epoch": 1.4373486094167154, - "grad_norm": 8.971811294555664, - "learning_rate": 2.6044189843054746e-05, - "loss": 0.5781, - "step": 162590 - }, - { - "epoch": 1.4374370126770275, - "grad_norm": 6.02656364440918, - "learning_rate": 2.6042716455382877e-05, - "loss": 0.5422, - "step": 162600 - }, - { - "epoch": 1.4375254159373398, - "grad_norm": 1.2006080150604248, - "learning_rate": 2.604124306771101e-05, - "loss": 0.6252, - "step": 162610 - }, - { - "epoch": 1.437613819197652, - "grad_norm": 3.102445602416992, - "learning_rate": 2.6039769680039134e-05, - "loss": 0.6512, - "step": 162620 - }, - { - "epoch": 1.4377022224579643, - "grad_norm": 5.744995594024658, - "learning_rate": 2.6038296292367266e-05, - "loss": 0.4888, - "step": 162630 - }, - { - "epoch": 1.4377906257182764, - "grad_norm": 1.6488481760025024, - "learning_rate": 2.603682290469539e-05, - "loss": 0.6474, - "step": 162640 - }, - { - "epoch": 1.4378790289785888, - "grad_norm": 14.50014591217041, - "learning_rate": 2.6035349517023522e-05, - "loss": 0.5481, - "step": 162650 - }, - { - "epoch": 1.4379674322389009, - "grad_norm": 1.5166046619415283, - "learning_rate": 2.6033876129351654e-05, - "loss": 0.5951, - "step": 162660 - }, - { - "epoch": 1.4380558354992132, - "grad_norm": 2.7222585678100586, - "learning_rate": 2.603240274167978e-05, - "loss": 0.5437, - "step": 162670 - }, - { - "epoch": 1.4381442387595254, - "grad_norm": 1.432569980621338, - "learning_rate": 2.603092935400791e-05, - "loss": 0.5863, - "step": 162680 - }, - { - "epoch": 1.4382326420198377, - "grad_norm": 3.668592691421509, - "learning_rate": 2.6029455966336043e-05, - "loss": 0.5204, - "step": 162690 - }, - { - "epoch": 1.43832104528015, - "grad_norm": 1.389432668685913, - "learning_rate": 2.6027982578664168e-05, - "loss": 0.6487, - "step": 162700 - }, - { - "epoch": 1.4384094485404622, - "grad_norm": 2.746288299560547, - "learning_rate": 2.60265091909923e-05, - "loss": 0.7084, - "step": 162710 - }, - { - "epoch": 1.4384978518007743, - "grad_norm": 3.2672362327575684, - "learning_rate": 2.602503580332043e-05, - "loss": 0.7816, - "step": 162720 - }, - { - "epoch": 1.4385862550610866, - "grad_norm": 12.6722412109375, - "learning_rate": 2.6023562415648556e-05, - "loss": 0.5416, - "step": 162730 - }, - { - "epoch": 1.438674658321399, - "grad_norm": 5.013515949249268, - "learning_rate": 2.6022089027976688e-05, - "loss": 0.5489, - "step": 162740 - }, - { - "epoch": 1.438763061581711, - "grad_norm": 2.7230336666107178, - "learning_rate": 2.6020615640304813e-05, - "loss": 0.525, - "step": 162750 - }, - { - "epoch": 1.4388514648420234, - "grad_norm": 5.559991836547852, - "learning_rate": 2.6019142252632945e-05, - "loss": 0.6051, - "step": 162760 - }, - { - "epoch": 1.4389398681023355, - "grad_norm": 6.633627891540527, - "learning_rate": 2.6017668864961076e-05, - "loss": 0.5731, - "step": 162770 - }, - { - "epoch": 1.4390282713626479, - "grad_norm": 2.0652992725372314, - "learning_rate": 2.60161954772892e-05, - "loss": 0.5908, - "step": 162780 - }, - { - "epoch": 1.43911667462296, - "grad_norm": 1.779591679573059, - "learning_rate": 2.6014722089617333e-05, - "loss": 0.4873, - "step": 162790 - }, - { - "epoch": 1.4392050778832723, - "grad_norm": 1.1020110845565796, - "learning_rate": 2.6013248701945465e-05, - "loss": 0.6138, - "step": 162800 - }, - { - "epoch": 1.4392934811435847, - "grad_norm": 0.9076933264732361, - "learning_rate": 2.601177531427359e-05, - "loss": 0.6028, - "step": 162810 - }, - { - "epoch": 1.4393818844038968, - "grad_norm": 2.0878868103027344, - "learning_rate": 2.601030192660172e-05, - "loss": 0.5928, - "step": 162820 - }, - { - "epoch": 1.439470287664209, - "grad_norm": 2.130429744720459, - "learning_rate": 2.6008828538929853e-05, - "loss": 0.5192, - "step": 162830 - }, - { - "epoch": 1.4395586909245213, - "grad_norm": 1.096369981765747, - "learning_rate": 2.6007355151257978e-05, - "loss": 0.5802, - "step": 162840 - }, - { - "epoch": 1.4396470941848336, - "grad_norm": 3.403939962387085, - "learning_rate": 2.600588176358611e-05, - "loss": 0.5427, - "step": 162850 - }, - { - "epoch": 1.4397354974451457, - "grad_norm": 3.4605319499969482, - "learning_rate": 2.600440837591424e-05, - "loss": 0.658, - "step": 162860 - }, - { - "epoch": 1.439823900705458, - "grad_norm": 1.0626569986343384, - "learning_rate": 2.6002934988242367e-05, - "loss": 0.4823, - "step": 162870 - }, - { - "epoch": 1.4399123039657702, - "grad_norm": 3.51475191116333, - "learning_rate": 2.60014616005705e-05, - "loss": 0.634, - "step": 162880 - }, - { - "epoch": 1.4400007072260825, - "grad_norm": 4.519690990447998, - "learning_rate": 2.5999988212898623e-05, - "loss": 0.6418, - "step": 162890 - }, - { - "epoch": 1.4400891104863947, - "grad_norm": 4.688043594360352, - "learning_rate": 2.5998514825226755e-05, - "loss": 0.6243, - "step": 162900 - }, - { - "epoch": 1.440177513746707, - "grad_norm": 3.764296293258667, - "learning_rate": 2.5997041437554887e-05, - "loss": 0.5926, - "step": 162910 - }, - { - "epoch": 1.4402659170070193, - "grad_norm": 1.5430827140808105, - "learning_rate": 2.5995568049883012e-05, - "loss": 0.73, - "step": 162920 - }, - { - "epoch": 1.4403543202673315, - "grad_norm": 1.4045923948287964, - "learning_rate": 2.5994094662211143e-05, - "loss": 0.56, - "step": 162930 - }, - { - "epoch": 1.4404427235276436, - "grad_norm": 11.848942756652832, - "learning_rate": 2.5992621274539275e-05, - "loss": 0.6137, - "step": 162940 - }, - { - "epoch": 1.440531126787956, - "grad_norm": 3.742100954055786, - "learning_rate": 2.59911478868674e-05, - "loss": 0.6206, - "step": 162950 - }, - { - "epoch": 1.4406195300482683, - "grad_norm": 1.1643280982971191, - "learning_rate": 2.5989674499195532e-05, - "loss": 0.5386, - "step": 162960 - }, - { - "epoch": 1.4407079333085804, - "grad_norm": 2.6120107173919678, - "learning_rate": 2.5988201111523664e-05, - "loss": 0.537, - "step": 162970 - }, - { - "epoch": 1.4407963365688927, - "grad_norm": 1.7390090227127075, - "learning_rate": 2.598672772385179e-05, - "loss": 0.5117, - "step": 162980 - }, - { - "epoch": 1.4408847398292048, - "grad_norm": 1.3304662704467773, - "learning_rate": 2.598525433617992e-05, - "loss": 0.4873, - "step": 162990 - }, - { - "epoch": 1.4409731430895172, - "grad_norm": 7.268707752227783, - "learning_rate": 2.5983780948508045e-05, - "loss": 0.6188, - "step": 163000 - }, - { - "epoch": 1.4410615463498293, - "grad_norm": 1.9802719354629517, - "learning_rate": 2.5982307560836177e-05, - "loss": 0.6798, - "step": 163010 - }, - { - "epoch": 1.4411499496101416, - "grad_norm": 2.593223810195923, - "learning_rate": 2.598083417316431e-05, - "loss": 0.5599, - "step": 163020 - }, - { - "epoch": 1.441238352870454, - "grad_norm": 6.6110334396362305, - "learning_rate": 2.5979360785492434e-05, - "loss": 0.7445, - "step": 163030 - }, - { - "epoch": 1.441326756130766, - "grad_norm": 1.256464958190918, - "learning_rate": 2.5977887397820566e-05, - "loss": 0.4669, - "step": 163040 - }, - { - "epoch": 1.4414151593910782, - "grad_norm": 1.5307859182357788, - "learning_rate": 2.5976414010148697e-05, - "loss": 0.5798, - "step": 163050 - }, - { - "epoch": 1.4415035626513906, - "grad_norm": 2.0247325897216797, - "learning_rate": 2.5974940622476822e-05, - "loss": 0.6276, - "step": 163060 - }, - { - "epoch": 1.441591965911703, - "grad_norm": 17.28339385986328, - "learning_rate": 2.5973467234804954e-05, - "loss": 0.5822, - "step": 163070 - }, - { - "epoch": 1.441680369172015, - "grad_norm": 2.3798680305480957, - "learning_rate": 2.5971993847133086e-05, - "loss": 0.4811, - "step": 163080 - }, - { - "epoch": 1.4417687724323274, - "grad_norm": 2.943125009536743, - "learning_rate": 2.597052045946121e-05, - "loss": 0.641, - "step": 163090 - }, - { - "epoch": 1.4418571756926395, - "grad_norm": 2.5619585514068604, - "learning_rate": 2.5969047071789342e-05, - "loss": 0.5306, - "step": 163100 - }, - { - "epoch": 1.4419455789529518, - "grad_norm": 1.1987885236740112, - "learning_rate": 2.5967573684117467e-05, - "loss": 0.5237, - "step": 163110 - }, - { - "epoch": 1.442033982213264, - "grad_norm": 5.159473419189453, - "learning_rate": 2.59661002964456e-05, - "loss": 0.6278, - "step": 163120 - }, - { - "epoch": 1.4421223854735763, - "grad_norm": 8.181326866149902, - "learning_rate": 2.596462690877373e-05, - "loss": 0.6036, - "step": 163130 - }, - { - "epoch": 1.4422107887338886, - "grad_norm": 2.690187692642212, - "learning_rate": 2.5963153521101856e-05, - "loss": 0.5328, - "step": 163140 - }, - { - "epoch": 1.4422991919942008, - "grad_norm": 1.867153525352478, - "learning_rate": 2.5961680133429988e-05, - "loss": 0.465, - "step": 163150 - }, - { - "epoch": 1.4423875952545129, - "grad_norm": 7.317404270172119, - "learning_rate": 2.596020674575812e-05, - "loss": 0.5974, - "step": 163160 - }, - { - "epoch": 1.4424759985148252, - "grad_norm": 3.129351854324341, - "learning_rate": 2.5958733358086244e-05, - "loss": 0.8205, - "step": 163170 - }, - { - "epoch": 1.4425644017751376, - "grad_norm": 1.3179301023483276, - "learning_rate": 2.5957259970414376e-05, - "loss": 0.6368, - "step": 163180 - }, - { - "epoch": 1.4426528050354497, - "grad_norm": 10.699811935424805, - "learning_rate": 2.5955786582742508e-05, - "loss": 0.5653, - "step": 163190 - }, - { - "epoch": 1.442741208295762, - "grad_norm": 6.10602331161499, - "learning_rate": 2.5954313195070633e-05, - "loss": 0.5601, - "step": 163200 - }, - { - "epoch": 1.4428296115560741, - "grad_norm": 10.145916938781738, - "learning_rate": 2.5952839807398764e-05, - "loss": 0.5393, - "step": 163210 - }, - { - "epoch": 1.4429180148163865, - "grad_norm": 3.739583730697632, - "learning_rate": 2.5951366419726896e-05, - "loss": 0.5284, - "step": 163220 - }, - { - "epoch": 1.4430064180766986, - "grad_norm": 6.051761150360107, - "learning_rate": 2.594989303205502e-05, - "loss": 0.5538, - "step": 163230 - }, - { - "epoch": 1.443094821337011, - "grad_norm": 1.2931458950042725, - "learning_rate": 2.5948419644383153e-05, - "loss": 0.6153, - "step": 163240 - }, - { - "epoch": 1.443183224597323, - "grad_norm": 18.104881286621094, - "learning_rate": 2.594694625671128e-05, - "loss": 0.6126, - "step": 163250 - }, - { - "epoch": 1.4432716278576354, - "grad_norm": 5.546779632568359, - "learning_rate": 2.594547286903941e-05, - "loss": 0.7233, - "step": 163260 - }, - { - "epoch": 1.4433600311179475, - "grad_norm": 2.75102162361145, - "learning_rate": 2.594399948136754e-05, - "loss": 0.7219, - "step": 163270 - }, - { - "epoch": 1.4434484343782599, - "grad_norm": 2.815718650817871, - "learning_rate": 2.594252609369567e-05, - "loss": 0.5152, - "step": 163280 - }, - { - "epoch": 1.4435368376385722, - "grad_norm": 4.630425930023193, - "learning_rate": 2.5941052706023798e-05, - "loss": 0.7018, - "step": 163290 - }, - { - "epoch": 1.4436252408988843, - "grad_norm": 3.211033344268799, - "learning_rate": 2.593957931835193e-05, - "loss": 0.5249, - "step": 163300 - }, - { - "epoch": 1.4437136441591967, - "grad_norm": 1.5600229501724243, - "learning_rate": 2.5938105930680058e-05, - "loss": 0.496, - "step": 163310 - }, - { - "epoch": 1.4438020474195088, - "grad_norm": 1.2481801509857178, - "learning_rate": 2.5936632543008187e-05, - "loss": 0.5194, - "step": 163320 - }, - { - "epoch": 1.4438904506798211, - "grad_norm": 2.3871798515319824, - "learning_rate": 2.5935159155336318e-05, - "loss": 0.6149, - "step": 163330 - }, - { - "epoch": 1.4439788539401333, - "grad_norm": 2.9217076301574707, - "learning_rate": 2.5933685767664447e-05, - "loss": 0.6472, - "step": 163340 - }, - { - "epoch": 1.4440672572004456, - "grad_norm": 2.064652442932129, - "learning_rate": 2.5932212379992575e-05, - "loss": 0.7448, - "step": 163350 - }, - { - "epoch": 1.4441556604607577, - "grad_norm": 4.032621383666992, - "learning_rate": 2.5930738992320703e-05, - "loss": 0.6343, - "step": 163360 - }, - { - "epoch": 1.44424406372107, - "grad_norm": 5.0219502449035645, - "learning_rate": 2.5929265604648835e-05, - "loss": 0.5416, - "step": 163370 - }, - { - "epoch": 1.4443324669813822, - "grad_norm": 1.3431707620620728, - "learning_rate": 2.5927792216976963e-05, - "loss": 0.5947, - "step": 163380 - }, - { - "epoch": 1.4444208702416945, - "grad_norm": 5.522706508636475, - "learning_rate": 2.5926318829305092e-05, - "loss": 0.6294, - "step": 163390 - }, - { - "epoch": 1.4445092735020069, - "grad_norm": 3.8069798946380615, - "learning_rate": 2.5924845441633224e-05, - "loss": 0.695, - "step": 163400 - }, - { - "epoch": 1.444597676762319, - "grad_norm": 3.3847084045410156, - "learning_rate": 2.5923372053961352e-05, - "loss": 0.5162, - "step": 163410 - }, - { - "epoch": 1.444686080022631, - "grad_norm": 1.954024314880371, - "learning_rate": 2.592189866628948e-05, - "loss": 0.5632, - "step": 163420 - }, - { - "epoch": 1.4447744832829434, - "grad_norm": 5.948233604431152, - "learning_rate": 2.5920425278617612e-05, - "loss": 0.5513, - "step": 163430 - }, - { - "epoch": 1.4448628865432558, - "grad_norm": 2.3392529487609863, - "learning_rate": 2.591895189094574e-05, - "loss": 0.5386, - "step": 163440 - }, - { - "epoch": 1.444951289803568, - "grad_norm": 1.4230355024337769, - "learning_rate": 2.591747850327387e-05, - "loss": 0.592, - "step": 163450 - }, - { - "epoch": 1.4450396930638802, - "grad_norm": 1.8998687267303467, - "learning_rate": 2.5916005115602e-05, - "loss": 0.5133, - "step": 163460 - }, - { - "epoch": 1.4451280963241924, - "grad_norm": 5.14849853515625, - "learning_rate": 2.5914531727930125e-05, - "loss": 0.614, - "step": 163470 - }, - { - "epoch": 1.4452164995845047, - "grad_norm": 5.489749908447266, - "learning_rate": 2.5913058340258257e-05, - "loss": 0.5105, - "step": 163480 - }, - { - "epoch": 1.4453049028448168, - "grad_norm": 1.4034628868103027, - "learning_rate": 2.591158495258639e-05, - "loss": 0.5288, - "step": 163490 - }, - { - "epoch": 1.4453933061051292, - "grad_norm": 17.642122268676758, - "learning_rate": 2.5910111564914514e-05, - "loss": 0.6285, - "step": 163500 - }, - { - "epoch": 1.4454817093654415, - "grad_norm": 4.625466823577881, - "learning_rate": 2.5908638177242646e-05, - "loss": 0.7202, - "step": 163510 - }, - { - "epoch": 1.4455701126257536, - "grad_norm": 2.3592336177825928, - "learning_rate": 2.5907164789570777e-05, - "loss": 0.5312, - "step": 163520 - }, - { - "epoch": 1.4456585158860658, - "grad_norm": 4.763549327850342, - "learning_rate": 2.5905691401898902e-05, - "loss": 0.6423, - "step": 163530 - }, - { - "epoch": 1.445746919146378, - "grad_norm": 6.493288993835449, - "learning_rate": 2.5904218014227034e-05, - "loss": 0.7212, - "step": 163540 - }, - { - "epoch": 1.4458353224066904, - "grad_norm": 11.763323783874512, - "learning_rate": 2.5902744626555166e-05, - "loss": 0.6255, - "step": 163550 - }, - { - "epoch": 1.4459237256670026, - "grad_norm": 4.125518321990967, - "learning_rate": 2.590127123888329e-05, - "loss": 0.5503, - "step": 163560 - }, - { - "epoch": 1.446012128927315, - "grad_norm": 4.00100564956665, - "learning_rate": 2.5899797851211423e-05, - "loss": 0.5956, - "step": 163570 - }, - { - "epoch": 1.446100532187627, - "grad_norm": 5.836422443389893, - "learning_rate": 2.5898324463539547e-05, - "loss": 0.5177, - "step": 163580 - }, - { - "epoch": 1.4461889354479394, - "grad_norm": 2.6270346641540527, - "learning_rate": 2.589685107586768e-05, - "loss": 0.554, - "step": 163590 - }, - { - "epoch": 1.4462773387082515, - "grad_norm": 4.215670108795166, - "learning_rate": 2.589537768819581e-05, - "loss": 0.5933, - "step": 163600 - }, - { - "epoch": 1.4463657419685638, - "grad_norm": 1.8026701211929321, - "learning_rate": 2.5893904300523936e-05, - "loss": 0.6172, - "step": 163610 - }, - { - "epoch": 1.4464541452288762, - "grad_norm": 6.321165084838867, - "learning_rate": 2.5892430912852068e-05, - "loss": 0.5316, - "step": 163620 - }, - { - "epoch": 1.4465425484891883, - "grad_norm": 2.622074842453003, - "learning_rate": 2.58909575251802e-05, - "loss": 0.5677, - "step": 163630 - }, - { - "epoch": 1.4466309517495004, - "grad_norm": 3.058978319168091, - "learning_rate": 2.5889484137508324e-05, - "loss": 0.6025, - "step": 163640 - }, - { - "epoch": 1.4467193550098127, - "grad_norm": 1.5226470232009888, - "learning_rate": 2.5888010749836456e-05, - "loss": 0.5846, - "step": 163650 - }, - { - "epoch": 1.446807758270125, - "grad_norm": 1.815367579460144, - "learning_rate": 2.5886537362164588e-05, - "loss": 0.5233, - "step": 163660 - }, - { - "epoch": 1.4468961615304372, - "grad_norm": 0.8955591320991516, - "learning_rate": 2.5885063974492713e-05, - "loss": 0.4743, - "step": 163670 - }, - { - "epoch": 1.4469845647907496, - "grad_norm": 13.716712951660156, - "learning_rate": 2.5883590586820845e-05, - "loss": 0.7029, - "step": 163680 - }, - { - "epoch": 1.4470729680510617, - "grad_norm": 2.3181943893432617, - "learning_rate": 2.5882117199148976e-05, - "loss": 0.6114, - "step": 163690 - }, - { - "epoch": 1.447161371311374, - "grad_norm": 1.5615142583847046, - "learning_rate": 2.58806438114771e-05, - "loss": 0.6424, - "step": 163700 - }, - { - "epoch": 1.4472497745716861, - "grad_norm": 2.9645166397094727, - "learning_rate": 2.5879170423805233e-05, - "loss": 0.6658, - "step": 163710 - }, - { - "epoch": 1.4473381778319985, - "grad_norm": 4.734382629394531, - "learning_rate": 2.5877697036133358e-05, - "loss": 0.5654, - "step": 163720 - }, - { - "epoch": 1.4474265810923108, - "grad_norm": 2.744168519973755, - "learning_rate": 2.587622364846149e-05, - "loss": 0.6321, - "step": 163730 - }, - { - "epoch": 1.447514984352623, - "grad_norm": 2.9295029640197754, - "learning_rate": 2.587475026078962e-05, - "loss": 0.6988, - "step": 163740 - }, - { - "epoch": 1.447603387612935, - "grad_norm": 3.176943302154541, - "learning_rate": 2.5873276873117746e-05, - "loss": 0.643, - "step": 163750 - }, - { - "epoch": 1.4476917908732474, - "grad_norm": 1.8153252601623535, - "learning_rate": 2.5871803485445878e-05, - "loss": 0.5405, - "step": 163760 - }, - { - "epoch": 1.4477801941335597, - "grad_norm": 1.4959596395492554, - "learning_rate": 2.587033009777401e-05, - "loss": 0.6259, - "step": 163770 - }, - { - "epoch": 1.4478685973938719, - "grad_norm": 2.9318108558654785, - "learning_rate": 2.5868856710102135e-05, - "loss": 0.5346, - "step": 163780 - }, - { - "epoch": 1.4479570006541842, - "grad_norm": 2.953251838684082, - "learning_rate": 2.5867383322430267e-05, - "loss": 0.5994, - "step": 163790 - }, - { - "epoch": 1.4480454039144963, - "grad_norm": 4.248293876647949, - "learning_rate": 2.58659099347584e-05, - "loss": 0.6384, - "step": 163800 - }, - { - "epoch": 1.4481338071748087, - "grad_norm": 2.5765395164489746, - "learning_rate": 2.5864436547086523e-05, - "loss": 0.5957, - "step": 163810 - }, - { - "epoch": 1.4482222104351208, - "grad_norm": 7.987663269042969, - "learning_rate": 2.5862963159414655e-05, - "loss": 0.6086, - "step": 163820 - }, - { - "epoch": 1.4483106136954331, - "grad_norm": 1.3870084285736084, - "learning_rate": 2.586148977174278e-05, - "loss": 0.6118, - "step": 163830 - }, - { - "epoch": 1.4483990169557455, - "grad_norm": 1.799506425857544, - "learning_rate": 2.5860016384070912e-05, - "loss": 0.5134, - "step": 163840 - }, - { - "epoch": 1.4484874202160576, - "grad_norm": 3.09377384185791, - "learning_rate": 2.5858542996399044e-05, - "loss": 0.4939, - "step": 163850 - }, - { - "epoch": 1.4485758234763697, - "grad_norm": 2.490576982498169, - "learning_rate": 2.585706960872717e-05, - "loss": 0.6821, - "step": 163860 - }, - { - "epoch": 1.448664226736682, - "grad_norm": 2.213803768157959, - "learning_rate": 2.58555962210553e-05, - "loss": 0.5501, - "step": 163870 - }, - { - "epoch": 1.4487526299969944, - "grad_norm": 8.636711120605469, - "learning_rate": 2.5854122833383432e-05, - "loss": 0.5457, - "step": 163880 - }, - { - "epoch": 1.4488410332573065, - "grad_norm": 2.0155556201934814, - "learning_rate": 2.5852649445711557e-05, - "loss": 0.4364, - "step": 163890 - }, - { - "epoch": 1.4489294365176189, - "grad_norm": 6.429178237915039, - "learning_rate": 2.585117605803969e-05, - "loss": 0.5651, - "step": 163900 - }, - { - "epoch": 1.449017839777931, - "grad_norm": 5.18787956237793, - "learning_rate": 2.584970267036782e-05, - "loss": 0.659, - "step": 163910 - }, - { - "epoch": 1.4491062430382433, - "grad_norm": 6.4320387840271, - "learning_rate": 2.5848229282695945e-05, - "loss": 0.4547, - "step": 163920 - }, - { - "epoch": 1.4491946462985554, - "grad_norm": 3.056828022003174, - "learning_rate": 2.5846755895024077e-05, - "loss": 0.5395, - "step": 163930 - }, - { - "epoch": 1.4492830495588678, - "grad_norm": 1.1863996982574463, - "learning_rate": 2.5845282507352202e-05, - "loss": 0.4899, - "step": 163940 - }, - { - "epoch": 1.44937145281918, - "grad_norm": 2.12358021736145, - "learning_rate": 2.5843809119680334e-05, - "loss": 0.5816, - "step": 163950 - }, - { - "epoch": 1.4494598560794922, - "grad_norm": 4.030679225921631, - "learning_rate": 2.5842335732008466e-05, - "loss": 0.6144, - "step": 163960 - }, - { - "epoch": 1.4495482593398044, - "grad_norm": 1.533742070198059, - "learning_rate": 2.584086234433659e-05, - "loss": 0.5092, - "step": 163970 - }, - { - "epoch": 1.4496366626001167, - "grad_norm": 3.387834072113037, - "learning_rate": 2.5839388956664722e-05, - "loss": 0.7097, - "step": 163980 - }, - { - "epoch": 1.449725065860429, - "grad_norm": 1.3237627744674683, - "learning_rate": 2.5837915568992854e-05, - "loss": 0.5897, - "step": 163990 - }, - { - "epoch": 1.4498134691207412, - "grad_norm": 1.4648076295852661, - "learning_rate": 2.583644218132098e-05, - "loss": 0.7495, - "step": 164000 - }, - { - "epoch": 1.4499018723810533, - "grad_norm": 3.0642778873443604, - "learning_rate": 2.583496879364911e-05, - "loss": 0.5616, - "step": 164010 - }, - { - "epoch": 1.4499902756413656, - "grad_norm": 3.9176270961761475, - "learning_rate": 2.5833495405977242e-05, - "loss": 0.6663, - "step": 164020 - }, - { - "epoch": 1.450078678901678, - "grad_norm": 17.731969833374023, - "learning_rate": 2.5832022018305367e-05, - "loss": 0.5695, - "step": 164030 - }, - { - "epoch": 1.45016708216199, - "grad_norm": 2.0136289596557617, - "learning_rate": 2.58305486306335e-05, - "loss": 0.6485, - "step": 164040 - }, - { - "epoch": 1.4502554854223024, - "grad_norm": 1.26592218875885, - "learning_rate": 2.5829075242961624e-05, - "loss": 0.4891, - "step": 164050 - }, - { - "epoch": 1.4503438886826145, - "grad_norm": 6.124168395996094, - "learning_rate": 2.5827601855289756e-05, - "loss": 0.5678, - "step": 164060 - }, - { - "epoch": 1.4504322919429269, - "grad_norm": 4.74151611328125, - "learning_rate": 2.5826128467617888e-05, - "loss": 0.6572, - "step": 164070 - }, - { - "epoch": 1.450520695203239, - "grad_norm": 3.4685003757476807, - "learning_rate": 2.5824655079946013e-05, - "loss": 0.5702, - "step": 164080 - }, - { - "epoch": 1.4506090984635513, - "grad_norm": 2.7056164741516113, - "learning_rate": 2.5823181692274144e-05, - "loss": 0.5537, - "step": 164090 - }, - { - "epoch": 1.4506975017238637, - "grad_norm": 6.796932220458984, - "learning_rate": 2.5821708304602276e-05, - "loss": 0.636, - "step": 164100 - }, - { - "epoch": 1.4507859049841758, - "grad_norm": 1.6616939306259155, - "learning_rate": 2.58202349169304e-05, - "loss": 0.5289, - "step": 164110 - }, - { - "epoch": 1.450874308244488, - "grad_norm": 3.142630100250244, - "learning_rate": 2.5818761529258533e-05, - "loss": 0.6268, - "step": 164120 - }, - { - "epoch": 1.4509627115048003, - "grad_norm": 1.2217828035354614, - "learning_rate": 2.5817288141586665e-05, - "loss": 0.6011, - "step": 164130 - }, - { - "epoch": 1.4510511147651126, - "grad_norm": 1.3970904350280762, - "learning_rate": 2.581581475391479e-05, - "loss": 0.5293, - "step": 164140 - }, - { - "epoch": 1.4511395180254247, - "grad_norm": 1.7399171590805054, - "learning_rate": 2.581434136624292e-05, - "loss": 0.5658, - "step": 164150 - }, - { - "epoch": 1.451227921285737, - "grad_norm": 2.3229031562805176, - "learning_rate": 2.5812867978571053e-05, - "loss": 0.599, - "step": 164160 - }, - { - "epoch": 1.4513163245460492, - "grad_norm": 7.8017258644104, - "learning_rate": 2.5811394590899178e-05, - "loss": 0.7117, - "step": 164170 - }, - { - "epoch": 1.4514047278063615, - "grad_norm": 6.35605525970459, - "learning_rate": 2.580992120322731e-05, - "loss": 0.7188, - "step": 164180 - }, - { - "epoch": 1.4514931310666737, - "grad_norm": 2.08288311958313, - "learning_rate": 2.5808447815555438e-05, - "loss": 0.5334, - "step": 164190 - }, - { - "epoch": 1.451581534326986, - "grad_norm": 15.069218635559082, - "learning_rate": 2.5806974427883566e-05, - "loss": 0.812, - "step": 164200 - }, - { - "epoch": 1.4516699375872983, - "grad_norm": 3.739450216293335, - "learning_rate": 2.5805501040211698e-05, - "loss": 0.6396, - "step": 164210 - }, - { - "epoch": 1.4517583408476105, - "grad_norm": 2.323925495147705, - "learning_rate": 2.5804027652539827e-05, - "loss": 0.6348, - "step": 164220 - }, - { - "epoch": 1.4518467441079226, - "grad_norm": 1.292046308517456, - "learning_rate": 2.5802554264867955e-05, - "loss": 0.6547, - "step": 164230 - }, - { - "epoch": 1.451935147368235, - "grad_norm": 6.096904754638672, - "learning_rate": 2.5801080877196087e-05, - "loss": 0.6436, - "step": 164240 - }, - { - "epoch": 1.4520235506285473, - "grad_norm": 5.194807529449463, - "learning_rate": 2.5799607489524215e-05, - "loss": 0.5368, - "step": 164250 - }, - { - "epoch": 1.4521119538888594, - "grad_norm": 1.9867806434631348, - "learning_rate": 2.5798134101852343e-05, - "loss": 0.5341, - "step": 164260 - }, - { - "epoch": 1.4522003571491717, - "grad_norm": 4.554685592651367, - "learning_rate": 2.5796660714180475e-05, - "loss": 0.5455, - "step": 164270 - }, - { - "epoch": 1.4522887604094838, - "grad_norm": 4.112213134765625, - "learning_rate": 2.5795187326508603e-05, - "loss": 0.5344, - "step": 164280 - }, - { - "epoch": 1.4523771636697962, - "grad_norm": 1.4324764013290405, - "learning_rate": 2.5793713938836732e-05, - "loss": 0.6195, - "step": 164290 - }, - { - "epoch": 1.4524655669301083, - "grad_norm": 1.9947705268859863, - "learning_rate": 2.579224055116486e-05, - "loss": 0.7024, - "step": 164300 - }, - { - "epoch": 1.4525539701904207, - "grad_norm": 2.0354230403900146, - "learning_rate": 2.5790767163492992e-05, - "loss": 0.6673, - "step": 164310 - }, - { - "epoch": 1.452642373450733, - "grad_norm": 1.0773022174835205, - "learning_rate": 2.578929377582112e-05, - "loss": 0.5314, - "step": 164320 - }, - { - "epoch": 1.4527307767110451, - "grad_norm": 11.21201229095459, - "learning_rate": 2.578782038814925e-05, - "loss": 0.5639, - "step": 164330 - }, - { - "epoch": 1.4528191799713572, - "grad_norm": 2.3989248275756836, - "learning_rate": 2.578634700047738e-05, - "loss": 0.6197, - "step": 164340 - }, - { - "epoch": 1.4529075832316696, - "grad_norm": 1.8855544328689575, - "learning_rate": 2.578487361280551e-05, - "loss": 0.6155, - "step": 164350 - }, - { - "epoch": 1.452995986491982, - "grad_norm": 1.1655921936035156, - "learning_rate": 2.5783400225133637e-05, - "loss": 0.5079, - "step": 164360 - }, - { - "epoch": 1.453084389752294, - "grad_norm": 4.279953479766846, - "learning_rate": 2.578192683746177e-05, - "loss": 0.6271, - "step": 164370 - }, - { - "epoch": 1.4531727930126064, - "grad_norm": 6.052027225494385, - "learning_rate": 2.5780453449789897e-05, - "loss": 0.6589, - "step": 164380 - }, - { - "epoch": 1.4532611962729185, - "grad_norm": 7.289895057678223, - "learning_rate": 2.5778980062118025e-05, - "loss": 0.6789, - "step": 164390 - }, - { - "epoch": 1.4533495995332308, - "grad_norm": 2.2724192142486572, - "learning_rate": 2.5777506674446157e-05, - "loss": 0.6214, - "step": 164400 - }, - { - "epoch": 1.453438002793543, - "grad_norm": 2.834459066390991, - "learning_rate": 2.5776033286774282e-05, - "loss": 0.5595, - "step": 164410 - }, - { - "epoch": 1.4535264060538553, - "grad_norm": 8.435432434082031, - "learning_rate": 2.5774559899102414e-05, - "loss": 0.7499, - "step": 164420 - }, - { - "epoch": 1.4536148093141676, - "grad_norm": 1.8576384782791138, - "learning_rate": 2.5773086511430546e-05, - "loss": 0.5031, - "step": 164430 - }, - { - "epoch": 1.4537032125744798, - "grad_norm": 3.54443621635437, - "learning_rate": 2.577161312375867e-05, - "loss": 0.7047, - "step": 164440 - }, - { - "epoch": 1.4537916158347919, - "grad_norm": 1.3779367208480835, - "learning_rate": 2.5770139736086802e-05, - "loss": 0.5273, - "step": 164450 - }, - { - "epoch": 1.4538800190951042, - "grad_norm": 0.951066792011261, - "learning_rate": 2.5768666348414934e-05, - "loss": 0.4368, - "step": 164460 - }, - { - "epoch": 1.4539684223554166, - "grad_norm": 6.272188663482666, - "learning_rate": 2.576719296074306e-05, - "loss": 0.6842, - "step": 164470 - }, - { - "epoch": 1.4540568256157287, - "grad_norm": 2.189135789871216, - "learning_rate": 2.576571957307119e-05, - "loss": 0.5512, - "step": 164480 - }, - { - "epoch": 1.454145228876041, - "grad_norm": 9.620318412780762, - "learning_rate": 2.5764246185399323e-05, - "loss": 0.5882, - "step": 164490 - }, - { - "epoch": 1.4542336321363531, - "grad_norm": 1.9308042526245117, - "learning_rate": 2.5762772797727448e-05, - "loss": 0.5067, - "step": 164500 - }, - { - "epoch": 1.4543220353966655, - "grad_norm": 2.0730581283569336, - "learning_rate": 2.576129941005558e-05, - "loss": 0.5153, - "step": 164510 - }, - { - "epoch": 1.4544104386569776, - "grad_norm": 1.6916236877441406, - "learning_rate": 2.5759826022383704e-05, - "loss": 0.6213, - "step": 164520 - }, - { - "epoch": 1.45449884191729, - "grad_norm": 5.143879413604736, - "learning_rate": 2.5758352634711836e-05, - "loss": 0.5907, - "step": 164530 - }, - { - "epoch": 1.454587245177602, - "grad_norm": 1.4263319969177246, - "learning_rate": 2.5756879247039968e-05, - "loss": 0.6434, - "step": 164540 - }, - { - "epoch": 1.4546756484379144, - "grad_norm": 32.212013244628906, - "learning_rate": 2.5755405859368093e-05, - "loss": 0.5096, - "step": 164550 - }, - { - "epoch": 1.4547640516982265, - "grad_norm": 1.8083791732788086, - "learning_rate": 2.5753932471696224e-05, - "loss": 0.4848, - "step": 164560 - }, - { - "epoch": 1.4548524549585389, - "grad_norm": 5.165712356567383, - "learning_rate": 2.5752459084024356e-05, - "loss": 0.6446, - "step": 164570 - }, - { - "epoch": 1.4549408582188512, - "grad_norm": 4.633554935455322, - "learning_rate": 2.575098569635248e-05, - "loss": 0.6794, - "step": 164580 - }, - { - "epoch": 1.4550292614791633, - "grad_norm": 9.161764144897461, - "learning_rate": 2.5749512308680613e-05, - "loss": 0.5444, - "step": 164590 - }, - { - "epoch": 1.4551176647394755, - "grad_norm": 3.4681951999664307, - "learning_rate": 2.5748038921008745e-05, - "loss": 0.5406, - "step": 164600 - }, - { - "epoch": 1.4552060679997878, - "grad_norm": 3.597238540649414, - "learning_rate": 2.574656553333687e-05, - "loss": 0.5073, - "step": 164610 - }, - { - "epoch": 1.4552944712601001, - "grad_norm": 3.0702738761901855, - "learning_rate": 2.5745092145665e-05, - "loss": 0.4819, - "step": 164620 - }, - { - "epoch": 1.4553828745204123, - "grad_norm": 3.031830310821533, - "learning_rate": 2.5743618757993133e-05, - "loss": 0.6888, - "step": 164630 - }, - { - "epoch": 1.4554712777807246, - "grad_norm": 3.0176455974578857, - "learning_rate": 2.5742145370321258e-05, - "loss": 0.6312, - "step": 164640 - }, - { - "epoch": 1.4555596810410367, - "grad_norm": 1.930683970451355, - "learning_rate": 2.574067198264939e-05, - "loss": 0.6988, - "step": 164650 - }, - { - "epoch": 1.455648084301349, - "grad_norm": 12.490296363830566, - "learning_rate": 2.5739198594977515e-05, - "loss": 0.623, - "step": 164660 - }, - { - "epoch": 1.4557364875616612, - "grad_norm": 2.5703468322753906, - "learning_rate": 2.5737725207305646e-05, - "loss": 0.6131, - "step": 164670 - }, - { - "epoch": 1.4558248908219735, - "grad_norm": 8.548884391784668, - "learning_rate": 2.5736251819633778e-05, - "loss": 0.5618, - "step": 164680 - }, - { - "epoch": 1.4559132940822859, - "grad_norm": 3.454601526260376, - "learning_rate": 2.5734778431961903e-05, - "loss": 0.6014, - "step": 164690 - }, - { - "epoch": 1.456001697342598, - "grad_norm": 2.2383291721343994, - "learning_rate": 2.5733305044290035e-05, - "loss": 0.5733, - "step": 164700 - }, - { - "epoch": 1.45609010060291, - "grad_norm": 4.370964527130127, - "learning_rate": 2.5731831656618167e-05, - "loss": 0.6482, - "step": 164710 - }, - { - "epoch": 1.4561785038632225, - "grad_norm": 8.252481460571289, - "learning_rate": 2.573035826894629e-05, - "loss": 0.646, - "step": 164720 - }, - { - "epoch": 1.4562669071235348, - "grad_norm": 1.504181146621704, - "learning_rate": 2.5728884881274423e-05, - "loss": 0.4821, - "step": 164730 - }, - { - "epoch": 1.456355310383847, - "grad_norm": 1.9877575635910034, - "learning_rate": 2.5727411493602555e-05, - "loss": 0.5806, - "step": 164740 - }, - { - "epoch": 1.4564437136441593, - "grad_norm": 1.5914674997329712, - "learning_rate": 2.572593810593068e-05, - "loss": 0.569, - "step": 164750 - }, - { - "epoch": 1.4565321169044714, - "grad_norm": 2.445352077484131, - "learning_rate": 2.5724464718258812e-05, - "loss": 0.5511, - "step": 164760 - }, - { - "epoch": 1.4566205201647837, - "grad_norm": 4.389750003814697, - "learning_rate": 2.5722991330586937e-05, - "loss": 0.6666, - "step": 164770 - }, - { - "epoch": 1.4567089234250958, - "grad_norm": 1.926706075668335, - "learning_rate": 2.572151794291507e-05, - "loss": 0.5022, - "step": 164780 - }, - { - "epoch": 1.4567973266854082, - "grad_norm": 5.218235015869141, - "learning_rate": 2.57200445552432e-05, - "loss": 0.6544, - "step": 164790 - }, - { - "epoch": 1.4568857299457205, - "grad_norm": 5.551985740661621, - "learning_rate": 2.5718571167571325e-05, - "loss": 0.6384, - "step": 164800 - }, - { - "epoch": 1.4569741332060326, - "grad_norm": 9.404993057250977, - "learning_rate": 2.5717097779899457e-05, - "loss": 0.7557, - "step": 164810 - }, - { - "epoch": 1.4570625364663448, - "grad_norm": 5.8141913414001465, - "learning_rate": 2.571562439222759e-05, - "loss": 0.5351, - "step": 164820 - }, - { - "epoch": 1.457150939726657, - "grad_norm": 3.627639055252075, - "learning_rate": 2.5714151004555714e-05, - "loss": 0.5949, - "step": 164830 - }, - { - "epoch": 1.4572393429869694, - "grad_norm": 12.041051864624023, - "learning_rate": 2.5712677616883845e-05, - "loss": 0.738, - "step": 164840 - }, - { - "epoch": 1.4573277462472816, - "grad_norm": 5.507813453674316, - "learning_rate": 2.5711204229211977e-05, - "loss": 0.5774, - "step": 164850 - }, - { - "epoch": 1.457416149507594, - "grad_norm": 6.31934118270874, - "learning_rate": 2.5709730841540102e-05, - "loss": 0.5821, - "step": 164860 - }, - { - "epoch": 1.457504552767906, - "grad_norm": 4.954954147338867, - "learning_rate": 2.5708257453868234e-05, - "loss": 0.6281, - "step": 164870 - }, - { - "epoch": 1.4575929560282184, - "grad_norm": 0.9911690950393677, - "learning_rate": 2.570678406619636e-05, - "loss": 0.4857, - "step": 164880 - }, - { - "epoch": 1.4576813592885305, - "grad_norm": 1.8980528116226196, - "learning_rate": 2.570531067852449e-05, - "loss": 0.5968, - "step": 164890 - }, - { - "epoch": 1.4577697625488428, - "grad_norm": 6.934421539306641, - "learning_rate": 2.5703837290852622e-05, - "loss": 0.5965, - "step": 164900 - }, - { - "epoch": 1.4578581658091552, - "grad_norm": 1.7270981073379517, - "learning_rate": 2.5702363903180747e-05, - "loss": 0.6563, - "step": 164910 - }, - { - "epoch": 1.4579465690694673, - "grad_norm": 1.2252440452575684, - "learning_rate": 2.570089051550888e-05, - "loss": 0.5351, - "step": 164920 - }, - { - "epoch": 1.4580349723297794, - "grad_norm": 7.718968868255615, - "learning_rate": 2.569941712783701e-05, - "loss": 0.5336, - "step": 164930 - }, - { - "epoch": 1.4581233755900918, - "grad_norm": 3.558626413345337, - "learning_rate": 2.5697943740165136e-05, - "loss": 0.6577, - "step": 164940 - }, - { - "epoch": 1.458211778850404, - "grad_norm": 2.1842081546783447, - "learning_rate": 2.5696470352493267e-05, - "loss": 0.6618, - "step": 164950 - }, - { - "epoch": 1.4583001821107162, - "grad_norm": 1.4713033437728882, - "learning_rate": 2.56949969648214e-05, - "loss": 0.5566, - "step": 164960 - }, - { - "epoch": 1.4583885853710286, - "grad_norm": 2.79213547706604, - "learning_rate": 2.5693523577149524e-05, - "loss": 0.7123, - "step": 164970 - }, - { - "epoch": 1.4584769886313407, - "grad_norm": 2.363684892654419, - "learning_rate": 2.5692050189477656e-05, - "loss": 0.7089, - "step": 164980 - }, - { - "epoch": 1.458565391891653, - "grad_norm": 3.16656231880188, - "learning_rate": 2.569057680180578e-05, - "loss": 0.653, - "step": 164990 - }, - { - "epoch": 1.4586537951519651, - "grad_norm": 1.1046537160873413, - "learning_rate": 2.5689103414133913e-05, - "loss": 0.5114, - "step": 165000 - }, - { - "epoch": 1.4587421984122775, - "grad_norm": 2.486225128173828, - "learning_rate": 2.5687630026462044e-05, - "loss": 0.5947, - "step": 165010 - }, - { - "epoch": 1.4588306016725898, - "grad_norm": 2.149235248565674, - "learning_rate": 2.568615663879017e-05, - "loss": 0.5024, - "step": 165020 - }, - { - "epoch": 1.458919004932902, - "grad_norm": 4.746137619018555, - "learning_rate": 2.56846832511183e-05, - "loss": 0.6324, - "step": 165030 - }, - { - "epoch": 1.459007408193214, - "grad_norm": 1.5279366970062256, - "learning_rate": 2.5683209863446433e-05, - "loss": 0.5474, - "step": 165040 - }, - { - "epoch": 1.4590958114535264, - "grad_norm": 9.404610633850098, - "learning_rate": 2.5681736475774558e-05, - "loss": 0.5092, - "step": 165050 - }, - { - "epoch": 1.4591842147138387, - "grad_norm": 1.1786949634552002, - "learning_rate": 2.568026308810269e-05, - "loss": 0.6351, - "step": 165060 - }, - { - "epoch": 1.4592726179741509, - "grad_norm": 3.8415687084198, - "learning_rate": 2.567878970043082e-05, - "loss": 0.5673, - "step": 165070 - }, - { - "epoch": 1.4593610212344632, - "grad_norm": 3.9637155532836914, - "learning_rate": 2.5677316312758946e-05, - "loss": 0.4905, - "step": 165080 - }, - { - "epoch": 1.4594494244947753, - "grad_norm": 1.7195208072662354, - "learning_rate": 2.5675842925087078e-05, - "loss": 0.6538, - "step": 165090 - }, - { - "epoch": 1.4595378277550877, - "grad_norm": 2.8560831546783447, - "learning_rate": 2.567436953741521e-05, - "loss": 0.6156, - "step": 165100 - }, - { - "epoch": 1.4596262310153998, - "grad_norm": 3.751354455947876, - "learning_rate": 2.5672896149743335e-05, - "loss": 0.6192, - "step": 165110 - }, - { - "epoch": 1.4597146342757121, - "grad_norm": 4.121771812438965, - "learning_rate": 2.5671422762071466e-05, - "loss": 0.5216, - "step": 165120 - }, - { - "epoch": 1.4598030375360243, - "grad_norm": 1.919204831123352, - "learning_rate": 2.5669949374399595e-05, - "loss": 0.558, - "step": 165130 - }, - { - "epoch": 1.4598914407963366, - "grad_norm": 2.993875741958618, - "learning_rate": 2.5668475986727723e-05, - "loss": 0.6175, - "step": 165140 - }, - { - "epoch": 1.4599798440566487, - "grad_norm": 2.063328742980957, - "learning_rate": 2.5667002599055855e-05, - "loss": 0.6634, - "step": 165150 - }, - { - "epoch": 1.460068247316961, - "grad_norm": 1.126875877380371, - "learning_rate": 2.5665529211383983e-05, - "loss": 0.6615, - "step": 165160 - }, - { - "epoch": 1.4601566505772734, - "grad_norm": 4.320643901824951, - "learning_rate": 2.566405582371211e-05, - "loss": 0.6034, - "step": 165170 - }, - { - "epoch": 1.4602450538375855, - "grad_norm": 2.498349905014038, - "learning_rate": 2.5662582436040243e-05, - "loss": 0.5868, - "step": 165180 - }, - { - "epoch": 1.4603334570978976, - "grad_norm": 1.547664999961853, - "learning_rate": 2.5661109048368372e-05, - "loss": 0.5582, - "step": 165190 - }, - { - "epoch": 1.46042186035821, - "grad_norm": 2.9772160053253174, - "learning_rate": 2.56596356606965e-05, - "loss": 0.6408, - "step": 165200 - }, - { - "epoch": 1.4605102636185223, - "grad_norm": 5.7648444175720215, - "learning_rate": 2.5658162273024632e-05, - "loss": 0.6789, - "step": 165210 - }, - { - "epoch": 1.4605986668788344, - "grad_norm": 2.8161935806274414, - "learning_rate": 2.565668888535276e-05, - "loss": 0.5534, - "step": 165220 - }, - { - "epoch": 1.4606870701391468, - "grad_norm": 0.6714740991592407, - "learning_rate": 2.565521549768089e-05, - "loss": 0.5508, - "step": 165230 - }, - { - "epoch": 1.460775473399459, - "grad_norm": 2.9895434379577637, - "learning_rate": 2.5653742110009017e-05, - "loss": 0.6576, - "step": 165240 - }, - { - "epoch": 1.4608638766597712, - "grad_norm": 3.760072708129883, - "learning_rate": 2.565226872233715e-05, - "loss": 0.5562, - "step": 165250 - }, - { - "epoch": 1.4609522799200834, - "grad_norm": 1.5829070806503296, - "learning_rate": 2.5650795334665277e-05, - "loss": 0.531, - "step": 165260 - }, - { - "epoch": 1.4610406831803957, - "grad_norm": 2.8461713790893555, - "learning_rate": 2.5649321946993405e-05, - "loss": 0.6239, - "step": 165270 - }, - { - "epoch": 1.461129086440708, - "grad_norm": 3.457423686981201, - "learning_rate": 2.5647848559321537e-05, - "loss": 0.5131, - "step": 165280 - }, - { - "epoch": 1.4612174897010202, - "grad_norm": 1.7850918769836426, - "learning_rate": 2.5646375171649665e-05, - "loss": 0.5816, - "step": 165290 - }, - { - "epoch": 1.4613058929613323, - "grad_norm": 3.1808416843414307, - "learning_rate": 2.5644901783977794e-05, - "loss": 0.6218, - "step": 165300 - }, - { - "epoch": 1.4613942962216446, - "grad_norm": 6.663628101348877, - "learning_rate": 2.5643428396305926e-05, - "loss": 0.7077, - "step": 165310 - }, - { - "epoch": 1.461482699481957, - "grad_norm": 3.2090237140655518, - "learning_rate": 2.5641955008634054e-05, - "loss": 0.5629, - "step": 165320 - }, - { - "epoch": 1.461571102742269, - "grad_norm": 1.8510327339172363, - "learning_rate": 2.5640481620962182e-05, - "loss": 0.5733, - "step": 165330 - }, - { - "epoch": 1.4616595060025814, - "grad_norm": 2.715728998184204, - "learning_rate": 2.5639008233290314e-05, - "loss": 0.5221, - "step": 165340 - }, - { - "epoch": 1.4617479092628936, - "grad_norm": 1.6793752908706665, - "learning_rate": 2.563753484561844e-05, - "loss": 0.6006, - "step": 165350 - }, - { - "epoch": 1.461836312523206, - "grad_norm": 1.9382816553115845, - "learning_rate": 2.563606145794657e-05, - "loss": 0.5766, - "step": 165360 - }, - { - "epoch": 1.461924715783518, - "grad_norm": 1.7276549339294434, - "learning_rate": 2.5634588070274702e-05, - "loss": 0.5043, - "step": 165370 - }, - { - "epoch": 1.4620131190438304, - "grad_norm": 3.428576946258545, - "learning_rate": 2.5633114682602827e-05, - "loss": 0.5291, - "step": 165380 - }, - { - "epoch": 1.4621015223041427, - "grad_norm": 1.6354753971099854, - "learning_rate": 2.563164129493096e-05, - "loss": 0.5526, - "step": 165390 - }, - { - "epoch": 1.4621899255644548, - "grad_norm": 2.0869579315185547, - "learning_rate": 2.563016790725909e-05, - "loss": 0.7208, - "step": 165400 - }, - { - "epoch": 1.462278328824767, - "grad_norm": 5.547192096710205, - "learning_rate": 2.5628694519587216e-05, - "loss": 0.5818, - "step": 165410 - }, - { - "epoch": 1.4623667320850793, - "grad_norm": 5.048770427703857, - "learning_rate": 2.5627221131915348e-05, - "loss": 0.6396, - "step": 165420 - }, - { - "epoch": 1.4624551353453916, - "grad_norm": 3.276597023010254, - "learning_rate": 2.562574774424348e-05, - "loss": 0.6493, - "step": 165430 - }, - { - "epoch": 1.4625435386057037, - "grad_norm": 4.226131916046143, - "learning_rate": 2.5624274356571604e-05, - "loss": 0.7202, - "step": 165440 - }, - { - "epoch": 1.462631941866016, - "grad_norm": 0.8856672048568726, - "learning_rate": 2.5622800968899736e-05, - "loss": 0.5749, - "step": 165450 - }, - { - "epoch": 1.4627203451263282, - "grad_norm": 1.2508918046951294, - "learning_rate": 2.562132758122786e-05, - "loss": 0.6631, - "step": 165460 - }, - { - "epoch": 1.4628087483866405, - "grad_norm": 3.146450996398926, - "learning_rate": 2.5619854193555993e-05, - "loss": 0.5841, - "step": 165470 - }, - { - "epoch": 1.4628971516469527, - "grad_norm": 2.5681655406951904, - "learning_rate": 2.5618380805884124e-05, - "loss": 0.6587, - "step": 165480 - }, - { - "epoch": 1.462985554907265, - "grad_norm": 5.823937892913818, - "learning_rate": 2.561690741821225e-05, - "loss": 0.5335, - "step": 165490 - }, - { - "epoch": 1.4630739581675773, - "grad_norm": 5.969976902008057, - "learning_rate": 2.561543403054038e-05, - "loss": 0.602, - "step": 165500 - }, - { - "epoch": 1.4631623614278895, - "grad_norm": 1.256791353225708, - "learning_rate": 2.5613960642868513e-05, - "loss": 0.5627, - "step": 165510 - }, - { - "epoch": 1.4632507646882016, - "grad_norm": 2.244436740875244, - "learning_rate": 2.5612487255196638e-05, - "loss": 0.6193, - "step": 165520 - }, - { - "epoch": 1.463339167948514, - "grad_norm": 5.083354949951172, - "learning_rate": 2.561101386752477e-05, - "loss": 0.5478, - "step": 165530 - }, - { - "epoch": 1.4634275712088263, - "grad_norm": 9.455780029296875, - "learning_rate": 2.56095404798529e-05, - "loss": 0.5386, - "step": 165540 - }, - { - "epoch": 1.4635159744691384, - "grad_norm": 2.285196304321289, - "learning_rate": 2.5608067092181026e-05, - "loss": 0.651, - "step": 165550 - }, - { - "epoch": 1.4636043777294507, - "grad_norm": 2.8001251220703125, - "learning_rate": 2.5606593704509158e-05, - "loss": 0.627, - "step": 165560 - }, - { - "epoch": 1.4636927809897629, - "grad_norm": 4.559179306030273, - "learning_rate": 2.560512031683729e-05, - "loss": 0.7675, - "step": 165570 - }, - { - "epoch": 1.4637811842500752, - "grad_norm": 3.3552405834198, - "learning_rate": 2.5603646929165415e-05, - "loss": 0.6884, - "step": 165580 - }, - { - "epoch": 1.4638695875103873, - "grad_norm": 6.944356918334961, - "learning_rate": 2.5602173541493547e-05, - "loss": 0.4882, - "step": 165590 - }, - { - "epoch": 1.4639579907706997, - "grad_norm": 1.5141106843948364, - "learning_rate": 2.560070015382167e-05, - "loss": 0.5562, - "step": 165600 - }, - { - "epoch": 1.464046394031012, - "grad_norm": 10.830549240112305, - "learning_rate": 2.5599226766149803e-05, - "loss": 0.6326, - "step": 165610 - }, - { - "epoch": 1.4641347972913241, - "grad_norm": 4.002711296081543, - "learning_rate": 2.5597753378477935e-05, - "loss": 0.6317, - "step": 165620 - }, - { - "epoch": 1.4642232005516362, - "grad_norm": 1.8983798027038574, - "learning_rate": 2.559627999080606e-05, - "loss": 0.6843, - "step": 165630 - }, - { - "epoch": 1.4643116038119486, - "grad_norm": 2.2025256156921387, - "learning_rate": 2.559480660313419e-05, - "loss": 0.6304, - "step": 165640 - }, - { - "epoch": 1.464400007072261, - "grad_norm": 2.338712453842163, - "learning_rate": 2.5593333215462323e-05, - "loss": 0.6261, - "step": 165650 - }, - { - "epoch": 1.464488410332573, - "grad_norm": 2.222493886947632, - "learning_rate": 2.559185982779045e-05, - "loss": 0.6161, - "step": 165660 - }, - { - "epoch": 1.4645768135928854, - "grad_norm": 2.706639051437378, - "learning_rate": 2.559038644011858e-05, - "loss": 0.6721, - "step": 165670 - }, - { - "epoch": 1.4646652168531975, - "grad_norm": 10.1292142868042, - "learning_rate": 2.5588913052446712e-05, - "loss": 0.5578, - "step": 165680 - }, - { - "epoch": 1.4647536201135098, - "grad_norm": 4.619641304016113, - "learning_rate": 2.5587439664774837e-05, - "loss": 0.5937, - "step": 165690 - }, - { - "epoch": 1.464842023373822, - "grad_norm": 3.47676420211792, - "learning_rate": 2.558596627710297e-05, - "loss": 0.6585, - "step": 165700 - }, - { - "epoch": 1.4649304266341343, - "grad_norm": 8.229005813598633, - "learning_rate": 2.5584492889431094e-05, - "loss": 0.5872, - "step": 165710 - }, - { - "epoch": 1.4650188298944464, - "grad_norm": 1.3553125858306885, - "learning_rate": 2.5583019501759225e-05, - "loss": 0.5232, - "step": 165720 - }, - { - "epoch": 1.4651072331547588, - "grad_norm": 0.8650517463684082, - "learning_rate": 2.5581546114087357e-05, - "loss": 0.5782, - "step": 165730 - }, - { - "epoch": 1.465195636415071, - "grad_norm": 0.8444440960884094, - "learning_rate": 2.5580072726415482e-05, - "loss": 0.6146, - "step": 165740 - }, - { - "epoch": 1.4652840396753832, - "grad_norm": 3.3970048427581787, - "learning_rate": 2.5578599338743614e-05, - "loss": 0.6311, - "step": 165750 - }, - { - "epoch": 1.4653724429356956, - "grad_norm": 2.781576633453369, - "learning_rate": 2.5577125951071745e-05, - "loss": 0.6764, - "step": 165760 - }, - { - "epoch": 1.4654608461960077, - "grad_norm": 1.3261603116989136, - "learning_rate": 2.557565256339987e-05, - "loss": 0.6909, - "step": 165770 - }, - { - "epoch": 1.4655492494563198, - "grad_norm": 1.5934619903564453, - "learning_rate": 2.5574179175728002e-05, - "loss": 0.6065, - "step": 165780 - }, - { - "epoch": 1.4656376527166322, - "grad_norm": 1.2074617147445679, - "learning_rate": 2.5572705788056134e-05, - "loss": 0.5175, - "step": 165790 - }, - { - "epoch": 1.4657260559769445, - "grad_norm": 0.938444197177887, - "learning_rate": 2.557123240038426e-05, - "loss": 0.565, - "step": 165800 - }, - { - "epoch": 1.4658144592372566, - "grad_norm": 1.1136524677276611, - "learning_rate": 2.556975901271239e-05, - "loss": 0.5749, - "step": 165810 - }, - { - "epoch": 1.465902862497569, - "grad_norm": 1.3726590871810913, - "learning_rate": 2.5568285625040516e-05, - "loss": 0.5493, - "step": 165820 - }, - { - "epoch": 1.465991265757881, - "grad_norm": 3.4139211177825928, - "learning_rate": 2.5566812237368647e-05, - "loss": 0.5185, - "step": 165830 - }, - { - "epoch": 1.4660796690181934, - "grad_norm": 2.2957217693328857, - "learning_rate": 2.556533884969678e-05, - "loss": 0.5733, - "step": 165840 - }, - { - "epoch": 1.4661680722785055, - "grad_norm": 3.5023183822631836, - "learning_rate": 2.5563865462024904e-05, - "loss": 0.6435, - "step": 165850 - }, - { - "epoch": 1.4662564755388179, - "grad_norm": 9.044766426086426, - "learning_rate": 2.5562392074353036e-05, - "loss": 0.551, - "step": 165860 - }, - { - "epoch": 1.4663448787991302, - "grad_norm": 1.5988744497299194, - "learning_rate": 2.5560918686681168e-05, - "loss": 0.5926, - "step": 165870 - }, - { - "epoch": 1.4664332820594423, - "grad_norm": 3.6065478324890137, - "learning_rate": 2.5559445299009293e-05, - "loss": 0.553, - "step": 165880 - }, - { - "epoch": 1.4665216853197545, - "grad_norm": 2.2264952659606934, - "learning_rate": 2.5557971911337424e-05, - "loss": 0.6058, - "step": 165890 - }, - { - "epoch": 1.4666100885800668, - "grad_norm": 1.6522276401519775, - "learning_rate": 2.5556498523665556e-05, - "loss": 0.5613, - "step": 165900 - }, - { - "epoch": 1.4666984918403791, - "grad_norm": 2.521946430206299, - "learning_rate": 2.555502513599368e-05, - "loss": 0.51, - "step": 165910 - }, - { - "epoch": 1.4667868951006913, - "grad_norm": 1.7782458066940308, - "learning_rate": 2.5553551748321813e-05, - "loss": 0.6272, - "step": 165920 - }, - { - "epoch": 1.4668752983610036, - "grad_norm": 2.4633171558380127, - "learning_rate": 2.5552078360649944e-05, - "loss": 0.6356, - "step": 165930 - }, - { - "epoch": 1.4669637016213157, - "grad_norm": 19.534011840820312, - "learning_rate": 2.555060497297807e-05, - "loss": 0.7104, - "step": 165940 - }, - { - "epoch": 1.467052104881628, - "grad_norm": 3.2198565006256104, - "learning_rate": 2.55491315853062e-05, - "loss": 0.6513, - "step": 165950 - }, - { - "epoch": 1.4671405081419402, - "grad_norm": 4.993585586547852, - "learning_rate": 2.5547658197634326e-05, - "loss": 0.6339, - "step": 165960 - }, - { - "epoch": 1.4672289114022525, - "grad_norm": 1.893470287322998, - "learning_rate": 2.5546184809962458e-05, - "loss": 0.5702, - "step": 165970 - }, - { - "epoch": 1.4673173146625649, - "grad_norm": 11.073661804199219, - "learning_rate": 2.554471142229059e-05, - "loss": 0.7347, - "step": 165980 - }, - { - "epoch": 1.467405717922877, - "grad_norm": 5.126662731170654, - "learning_rate": 2.5543238034618715e-05, - "loss": 0.5893, - "step": 165990 - }, - { - "epoch": 1.4674941211831891, - "grad_norm": 9.233426094055176, - "learning_rate": 2.5541764646946846e-05, - "loss": 0.6406, - "step": 166000 - }, - { - "epoch": 1.4675825244435015, - "grad_norm": 3.078634738922119, - "learning_rate": 2.5540291259274978e-05, - "loss": 0.5937, - "step": 166010 - }, - { - "epoch": 1.4676709277038138, - "grad_norm": 1.6421597003936768, - "learning_rate": 2.5538817871603103e-05, - "loss": 0.4789, - "step": 166020 - }, - { - "epoch": 1.467759330964126, - "grad_norm": 1.9199482202529907, - "learning_rate": 2.5537344483931235e-05, - "loss": 0.688, - "step": 166030 - }, - { - "epoch": 1.4678477342244383, - "grad_norm": 3.4600327014923096, - "learning_rate": 2.5535871096259366e-05, - "loss": 0.5997, - "step": 166040 - }, - { - "epoch": 1.4679361374847504, - "grad_norm": 10.555093765258789, - "learning_rate": 2.553439770858749e-05, - "loss": 0.6382, - "step": 166050 - }, - { - "epoch": 1.4680245407450627, - "grad_norm": 4.561395168304443, - "learning_rate": 2.5532924320915623e-05, - "loss": 0.5911, - "step": 166060 - }, - { - "epoch": 1.4681129440053748, - "grad_norm": 4.001600742340088, - "learning_rate": 2.553145093324375e-05, - "loss": 0.5947, - "step": 166070 - }, - { - "epoch": 1.4682013472656872, - "grad_norm": 1.355435848236084, - "learning_rate": 2.552997754557188e-05, - "loss": 0.5764, - "step": 166080 - }, - { - "epoch": 1.4682897505259995, - "grad_norm": 6.710354328155518, - "learning_rate": 2.552850415790001e-05, - "loss": 0.688, - "step": 166090 - }, - { - "epoch": 1.4683781537863116, - "grad_norm": 1.9244849681854248, - "learning_rate": 2.552703077022814e-05, - "loss": 0.6373, - "step": 166100 - }, - { - "epoch": 1.4684665570466238, - "grad_norm": 5.5034589767456055, - "learning_rate": 2.552555738255627e-05, - "loss": 0.6617, - "step": 166110 - }, - { - "epoch": 1.468554960306936, - "grad_norm": 1.4997817277908325, - "learning_rate": 2.55240839948844e-05, - "loss": 0.5782, - "step": 166120 - }, - { - "epoch": 1.4686433635672484, - "grad_norm": 2.0024592876434326, - "learning_rate": 2.552261060721253e-05, - "loss": 0.5451, - "step": 166130 - }, - { - "epoch": 1.4687317668275606, - "grad_norm": 2.064544677734375, - "learning_rate": 2.5521137219540657e-05, - "loss": 0.6011, - "step": 166140 - }, - { - "epoch": 1.468820170087873, - "grad_norm": 0.968387246131897, - "learning_rate": 2.551966383186879e-05, - "loss": 0.5261, - "step": 166150 - }, - { - "epoch": 1.468908573348185, - "grad_norm": 3.8145291805267334, - "learning_rate": 2.5518190444196917e-05, - "loss": 0.5823, - "step": 166160 - }, - { - "epoch": 1.4689969766084974, - "grad_norm": 1.5181794166564941, - "learning_rate": 2.5516717056525045e-05, - "loss": 0.6442, - "step": 166170 - }, - { - "epoch": 1.4690853798688095, - "grad_norm": 5.1150407791137695, - "learning_rate": 2.5515243668853174e-05, - "loss": 0.6484, - "step": 166180 - }, - { - "epoch": 1.4691737831291218, - "grad_norm": 5.140918254852295, - "learning_rate": 2.5513770281181305e-05, - "loss": 0.5803, - "step": 166190 - }, - { - "epoch": 1.4692621863894342, - "grad_norm": 4.786940574645996, - "learning_rate": 2.5512296893509434e-05, - "loss": 0.6295, - "step": 166200 - }, - { - "epoch": 1.4693505896497463, - "grad_norm": 5.206173896789551, - "learning_rate": 2.5510823505837562e-05, - "loss": 0.6806, - "step": 166210 - }, - { - "epoch": 1.4694389929100584, - "grad_norm": 12.629183769226074, - "learning_rate": 2.5509350118165694e-05, - "loss": 0.6354, - "step": 166220 - }, - { - "epoch": 1.4695273961703708, - "grad_norm": 1.6833518743515015, - "learning_rate": 2.5507876730493822e-05, - "loss": 0.4418, - "step": 166230 - }, - { - "epoch": 1.469615799430683, - "grad_norm": 5.06895637512207, - "learning_rate": 2.550640334282195e-05, - "loss": 0.6074, - "step": 166240 - }, - { - "epoch": 1.4697042026909952, - "grad_norm": 2.3329856395721436, - "learning_rate": 2.5504929955150082e-05, - "loss": 0.6471, - "step": 166250 - }, - { - "epoch": 1.4697926059513076, - "grad_norm": 2.237107753753662, - "learning_rate": 2.550345656747821e-05, - "loss": 0.4574, - "step": 166260 - }, - { - "epoch": 1.4698810092116197, - "grad_norm": 1.2353187799453735, - "learning_rate": 2.550198317980634e-05, - "loss": 0.5754, - "step": 166270 - }, - { - "epoch": 1.469969412471932, - "grad_norm": 3.021536350250244, - "learning_rate": 2.550050979213447e-05, - "loss": 0.6932, - "step": 166280 - }, - { - "epoch": 1.4700578157322441, - "grad_norm": 14.792726516723633, - "learning_rate": 2.5499036404462596e-05, - "loss": 0.6391, - "step": 166290 - }, - { - "epoch": 1.4701462189925565, - "grad_norm": 1.6500245332717896, - "learning_rate": 2.5497563016790727e-05, - "loss": 0.6702, - "step": 166300 - }, - { - "epoch": 1.4702346222528686, - "grad_norm": 1.6836272478103638, - "learning_rate": 2.549608962911886e-05, - "loss": 0.5673, - "step": 166310 - }, - { - "epoch": 1.470323025513181, - "grad_norm": 1.1950279474258423, - "learning_rate": 2.5494616241446984e-05, - "loss": 0.6478, - "step": 166320 - }, - { - "epoch": 1.470411428773493, - "grad_norm": 1.10524582862854, - "learning_rate": 2.5493142853775116e-05, - "loss": 0.5465, - "step": 166330 - }, - { - "epoch": 1.4704998320338054, - "grad_norm": 1.3094438314437866, - "learning_rate": 2.5491669466103248e-05, - "loss": 0.7214, - "step": 166340 - }, - { - "epoch": 1.4705882352941178, - "grad_norm": 1.426546335220337, - "learning_rate": 2.5490196078431373e-05, - "loss": 0.594, - "step": 166350 - }, - { - "epoch": 1.4706766385544299, - "grad_norm": 1.3956400156021118, - "learning_rate": 2.5488722690759504e-05, - "loss": 0.6851, - "step": 166360 - }, - { - "epoch": 1.470765041814742, - "grad_norm": 2.0667717456817627, - "learning_rate": 2.5487249303087636e-05, - "loss": 0.4455, - "step": 166370 - }, - { - "epoch": 1.4708534450750543, - "grad_norm": 2.319659948348999, - "learning_rate": 2.548577591541576e-05, - "loss": 0.6068, - "step": 166380 - }, - { - "epoch": 1.4709418483353667, - "grad_norm": 1.7326587438583374, - "learning_rate": 2.5484302527743893e-05, - "loss": 0.621, - "step": 166390 - }, - { - "epoch": 1.4710302515956788, - "grad_norm": 1.8600358963012695, - "learning_rate": 2.5482829140072025e-05, - "loss": 0.5259, - "step": 166400 - }, - { - "epoch": 1.4711186548559911, - "grad_norm": 2.192852020263672, - "learning_rate": 2.548135575240015e-05, - "loss": 0.6756, - "step": 166410 - }, - { - "epoch": 1.4712070581163033, - "grad_norm": 1.141935110092163, - "learning_rate": 2.547988236472828e-05, - "loss": 0.5778, - "step": 166420 - }, - { - "epoch": 1.4712954613766156, - "grad_norm": 5.724787712097168, - "learning_rate": 2.5478408977056406e-05, - "loss": 0.67, - "step": 166430 - }, - { - "epoch": 1.4713838646369277, - "grad_norm": 1.5245126485824585, - "learning_rate": 2.5476935589384538e-05, - "loss": 0.6764, - "step": 166440 - }, - { - "epoch": 1.47147226789724, - "grad_norm": 4.510645866394043, - "learning_rate": 2.547546220171267e-05, - "loss": 0.565, - "step": 166450 - }, - { - "epoch": 1.4715606711575524, - "grad_norm": 1.9344482421875, - "learning_rate": 2.5473988814040795e-05, - "loss": 0.6386, - "step": 166460 - }, - { - "epoch": 1.4716490744178645, - "grad_norm": 1.6704473495483398, - "learning_rate": 2.5472515426368926e-05, - "loss": 0.5401, - "step": 166470 - }, - { - "epoch": 1.4717374776781766, - "grad_norm": 3.4325690269470215, - "learning_rate": 2.5471042038697058e-05, - "loss": 0.7064, - "step": 166480 - }, - { - "epoch": 1.471825880938489, - "grad_norm": 2.9860715866088867, - "learning_rate": 2.5469568651025183e-05, - "loss": 0.6302, - "step": 166490 - }, - { - "epoch": 1.4719142841988013, - "grad_norm": 1.640015959739685, - "learning_rate": 2.5468095263353315e-05, - "loss": 0.5719, - "step": 166500 - }, - { - "epoch": 1.4720026874591134, - "grad_norm": 1.1518672704696655, - "learning_rate": 2.5466621875681447e-05, - "loss": 0.4811, - "step": 166510 - }, - { - "epoch": 1.4720910907194258, - "grad_norm": 2.573875904083252, - "learning_rate": 2.546514848800957e-05, - "loss": 0.6277, - "step": 166520 - }, - { - "epoch": 1.472179493979738, - "grad_norm": 3.281602144241333, - "learning_rate": 2.5463675100337703e-05, - "loss": 0.7033, - "step": 166530 - }, - { - "epoch": 1.4722678972400502, - "grad_norm": 3.3142848014831543, - "learning_rate": 2.5462201712665828e-05, - "loss": 0.5406, - "step": 166540 - }, - { - "epoch": 1.4723563005003624, - "grad_norm": 2.545706033706665, - "learning_rate": 2.546072832499396e-05, - "loss": 0.501, - "step": 166550 - }, - { - "epoch": 1.4724447037606747, - "grad_norm": 1.3589352369308472, - "learning_rate": 2.5459254937322092e-05, - "loss": 0.5504, - "step": 166560 - }, - { - "epoch": 1.472533107020987, - "grad_norm": 1.3167109489440918, - "learning_rate": 2.5457781549650217e-05, - "loss": 0.5171, - "step": 166570 - }, - { - "epoch": 1.4726215102812992, - "grad_norm": 1.404005765914917, - "learning_rate": 2.545630816197835e-05, - "loss": 0.3545, - "step": 166580 - }, - { - "epoch": 1.4727099135416113, - "grad_norm": 2.4481923580169678, - "learning_rate": 2.545483477430648e-05, - "loss": 0.6935, - "step": 166590 - }, - { - "epoch": 1.4727983168019236, - "grad_norm": 4.6668381690979, - "learning_rate": 2.5453361386634605e-05, - "loss": 0.5315, - "step": 166600 - }, - { - "epoch": 1.472886720062236, - "grad_norm": 1.568570852279663, - "learning_rate": 2.5451887998962737e-05, - "loss": 0.6689, - "step": 166610 - }, - { - "epoch": 1.472975123322548, - "grad_norm": 3.709521532058716, - "learning_rate": 2.545041461129087e-05, - "loss": 0.622, - "step": 166620 - }, - { - "epoch": 1.4730635265828604, - "grad_norm": 1.1138874292373657, - "learning_rate": 2.5448941223618994e-05, - "loss": 0.6033, - "step": 166630 - }, - { - "epoch": 1.4731519298431726, - "grad_norm": 1.392332673072815, - "learning_rate": 2.5447467835947125e-05, - "loss": 0.6351, - "step": 166640 - }, - { - "epoch": 1.473240333103485, - "grad_norm": 1.611612319946289, - "learning_rate": 2.544599444827525e-05, - "loss": 0.6065, - "step": 166650 - }, - { - "epoch": 1.473328736363797, - "grad_norm": 2.563417911529541, - "learning_rate": 2.5444521060603382e-05, - "loss": 0.7093, - "step": 166660 - }, - { - "epoch": 1.4734171396241094, - "grad_norm": 7.940690517425537, - "learning_rate": 2.5443047672931514e-05, - "loss": 0.6157, - "step": 166670 - }, - { - "epoch": 1.4735055428844217, - "grad_norm": 11.77758502960205, - "learning_rate": 2.544157428525964e-05, - "loss": 0.6166, - "step": 166680 - }, - { - "epoch": 1.4735939461447338, - "grad_norm": 2.736783981323242, - "learning_rate": 2.544010089758777e-05, - "loss": 0.5801, - "step": 166690 - }, - { - "epoch": 1.473682349405046, - "grad_norm": 2.061103582382202, - "learning_rate": 2.5438627509915902e-05, - "loss": 0.6122, - "step": 166700 - }, - { - "epoch": 1.4737707526653583, - "grad_norm": 4.539426803588867, - "learning_rate": 2.5437154122244027e-05, - "loss": 0.7098, - "step": 166710 - }, - { - "epoch": 1.4738591559256706, - "grad_norm": 12.13815689086914, - "learning_rate": 2.543568073457216e-05, - "loss": 0.5149, - "step": 166720 - }, - { - "epoch": 1.4739475591859827, - "grad_norm": 2.472132444381714, - "learning_rate": 2.543420734690029e-05, - "loss": 0.5887, - "step": 166730 - }, - { - "epoch": 1.474035962446295, - "grad_norm": 5.629611492156982, - "learning_rate": 2.5432733959228416e-05, - "loss": 0.5497, - "step": 166740 - }, - { - "epoch": 1.4741243657066072, - "grad_norm": 7.8986496925354, - "learning_rate": 2.5431260571556547e-05, - "loss": 0.6194, - "step": 166750 - }, - { - "epoch": 1.4742127689669196, - "grad_norm": 1.58394455909729, - "learning_rate": 2.5429787183884672e-05, - "loss": 0.62, - "step": 166760 - }, - { - "epoch": 1.4743011722272317, - "grad_norm": 2.47725772857666, - "learning_rate": 2.5428313796212804e-05, - "loss": 0.5576, - "step": 166770 - }, - { - "epoch": 1.474389575487544, - "grad_norm": 1.5111464262008667, - "learning_rate": 2.5426840408540936e-05, - "loss": 0.6828, - "step": 166780 - }, - { - "epoch": 1.4744779787478564, - "grad_norm": 29.438861846923828, - "learning_rate": 2.542536702086906e-05, - "loss": 0.487, - "step": 166790 - }, - { - "epoch": 1.4745663820081685, - "grad_norm": 2.4611611366271973, - "learning_rate": 2.5423893633197193e-05, - "loss": 0.624, - "step": 166800 - }, - { - "epoch": 1.4746547852684806, - "grad_norm": 1.5150115489959717, - "learning_rate": 2.5422420245525324e-05, - "loss": 0.5757, - "step": 166810 - }, - { - "epoch": 1.474743188528793, - "grad_norm": 6.518135070800781, - "learning_rate": 2.542094685785345e-05, - "loss": 0.6352, - "step": 166820 - }, - { - "epoch": 1.4748315917891053, - "grad_norm": 1.4559897184371948, - "learning_rate": 2.541947347018158e-05, - "loss": 0.6181, - "step": 166830 - }, - { - "epoch": 1.4749199950494174, - "grad_norm": 8.118577003479004, - "learning_rate": 2.5418000082509713e-05, - "loss": 0.5698, - "step": 166840 - }, - { - "epoch": 1.4750083983097297, - "grad_norm": 1.0413365364074707, - "learning_rate": 2.5416526694837838e-05, - "loss": 0.5416, - "step": 166850 - }, - { - "epoch": 1.4750968015700419, - "grad_norm": 1.469028353691101, - "learning_rate": 2.541505330716597e-05, - "loss": 0.5961, - "step": 166860 - }, - { - "epoch": 1.4751852048303542, - "grad_norm": 1.7227214574813843, - "learning_rate": 2.54135799194941e-05, - "loss": 0.6318, - "step": 166870 - }, - { - "epoch": 1.4752736080906663, - "grad_norm": 1.8608362674713135, - "learning_rate": 2.5412106531822226e-05, - "loss": 0.6335, - "step": 166880 - }, - { - "epoch": 1.4753620113509787, - "grad_norm": 2.001398801803589, - "learning_rate": 2.5410633144150358e-05, - "loss": 0.6015, - "step": 166890 - }, - { - "epoch": 1.4754504146112908, - "grad_norm": 2.1302404403686523, - "learning_rate": 2.5409159756478483e-05, - "loss": 0.6828, - "step": 166900 - }, - { - "epoch": 1.4755388178716031, - "grad_norm": 8.017047882080078, - "learning_rate": 2.5407686368806615e-05, - "loss": 0.5458, - "step": 166910 - }, - { - "epoch": 1.4756272211319152, - "grad_norm": 3.74044132232666, - "learning_rate": 2.5406212981134746e-05, - "loss": 0.6147, - "step": 166920 - }, - { - "epoch": 1.4757156243922276, - "grad_norm": 1.7153632640838623, - "learning_rate": 2.540473959346287e-05, - "loss": 0.5492, - "step": 166930 - }, - { - "epoch": 1.47580402765254, - "grad_norm": 1.6338903903961182, - "learning_rate": 2.5403266205791003e-05, - "loss": 0.6237, - "step": 166940 - }, - { - "epoch": 1.475892430912852, - "grad_norm": 2.5506057739257812, - "learning_rate": 2.5401792818119135e-05, - "loss": 0.7075, - "step": 166950 - }, - { - "epoch": 1.4759808341731642, - "grad_norm": 2.5842106342315674, - "learning_rate": 2.540031943044726e-05, - "loss": 0.5288, - "step": 166960 - }, - { - "epoch": 1.4760692374334765, - "grad_norm": 1.050004243850708, - "learning_rate": 2.539884604277539e-05, - "loss": 0.649, - "step": 166970 - }, - { - "epoch": 1.4761576406937889, - "grad_norm": 16.436973571777344, - "learning_rate": 2.5397372655103523e-05, - "loss": 0.4757, - "step": 166980 - }, - { - "epoch": 1.476246043954101, - "grad_norm": 1.9779506921768188, - "learning_rate": 2.5395899267431648e-05, - "loss": 0.6412, - "step": 166990 - }, - { - "epoch": 1.4763344472144133, - "grad_norm": 1.8107967376708984, - "learning_rate": 2.539442587975978e-05, - "loss": 0.5409, - "step": 167000 - }, - { - "epoch": 1.4764228504747254, - "grad_norm": 1.4559743404388428, - "learning_rate": 2.539295249208791e-05, - "loss": 0.6913, - "step": 167010 - }, - { - "epoch": 1.4765112537350378, - "grad_norm": 4.314670562744141, - "learning_rate": 2.5391479104416037e-05, - "loss": 0.605, - "step": 167020 - }, - { - "epoch": 1.47659965699535, - "grad_norm": 1.7426317930221558, - "learning_rate": 2.539000571674417e-05, - "loss": 0.5102, - "step": 167030 - }, - { - "epoch": 1.4766880602556622, - "grad_norm": 2.9428422451019287, - "learning_rate": 2.5388532329072297e-05, - "loss": 0.6702, - "step": 167040 - }, - { - "epoch": 1.4767764635159746, - "grad_norm": 1.1322271823883057, - "learning_rate": 2.5387058941400425e-05, - "loss": 0.6103, - "step": 167050 - }, - { - "epoch": 1.4768648667762867, - "grad_norm": 3.2456536293029785, - "learning_rate": 2.5385585553728557e-05, - "loss": 0.5782, - "step": 167060 - }, - { - "epoch": 1.4769532700365988, - "grad_norm": 2.7037160396575928, - "learning_rate": 2.5384112166056685e-05, - "loss": 0.5825, - "step": 167070 - }, - { - "epoch": 1.4770416732969112, - "grad_norm": 2.728236675262451, - "learning_rate": 2.5382638778384814e-05, - "loss": 0.5521, - "step": 167080 - }, - { - "epoch": 1.4771300765572235, - "grad_norm": 3.207359790802002, - "learning_rate": 2.5381165390712945e-05, - "loss": 0.5406, - "step": 167090 - }, - { - "epoch": 1.4772184798175356, - "grad_norm": 1.6934537887573242, - "learning_rate": 2.5379692003041074e-05, - "loss": 0.5483, - "step": 167100 - }, - { - "epoch": 1.477306883077848, - "grad_norm": 3.5528311729431152, - "learning_rate": 2.5378218615369202e-05, - "loss": 0.6066, - "step": 167110 - }, - { - "epoch": 1.47739528633816, - "grad_norm": 1.6753278970718384, - "learning_rate": 2.537674522769733e-05, - "loss": 0.5992, - "step": 167120 - }, - { - "epoch": 1.4774836895984724, - "grad_norm": 1.439624547958374, - "learning_rate": 2.5375271840025462e-05, - "loss": 0.5294, - "step": 167130 - }, - { - "epoch": 1.4775720928587845, - "grad_norm": 2.9812662601470947, - "learning_rate": 2.537379845235359e-05, - "loss": 0.6862, - "step": 167140 - }, - { - "epoch": 1.477660496119097, - "grad_norm": 2.172999143600464, - "learning_rate": 2.537232506468172e-05, - "loss": 0.6114, - "step": 167150 - }, - { - "epoch": 1.4777488993794092, - "grad_norm": 2.554595470428467, - "learning_rate": 2.537085167700985e-05, - "loss": 0.6167, - "step": 167160 - }, - { - "epoch": 1.4778373026397214, - "grad_norm": 4.447844982147217, - "learning_rate": 2.536937828933798e-05, - "loss": 0.58, - "step": 167170 - }, - { - "epoch": 1.4779257059000335, - "grad_norm": 1.9268163442611694, - "learning_rate": 2.5367904901666107e-05, - "loss": 0.6194, - "step": 167180 - }, - { - "epoch": 1.4780141091603458, - "grad_norm": 4.2392258644104, - "learning_rate": 2.536643151399424e-05, - "loss": 0.6735, - "step": 167190 - }, - { - "epoch": 1.4781025124206582, - "grad_norm": 2.439371109008789, - "learning_rate": 2.5364958126322367e-05, - "loss": 0.6448, - "step": 167200 - }, - { - "epoch": 1.4781909156809703, - "grad_norm": 1.1882117986679077, - "learning_rate": 2.5363484738650496e-05, - "loss": 0.4941, - "step": 167210 - }, - { - "epoch": 1.4782793189412826, - "grad_norm": 2.04378342628479, - "learning_rate": 2.5362011350978627e-05, - "loss": 0.4746, - "step": 167220 - }, - { - "epoch": 1.4783677222015947, - "grad_norm": 2.697740316390991, - "learning_rate": 2.5360537963306752e-05, - "loss": 0.764, - "step": 167230 - }, - { - "epoch": 1.478456125461907, - "grad_norm": 1.494955062866211, - "learning_rate": 2.5359064575634884e-05, - "loss": 0.6697, - "step": 167240 - }, - { - "epoch": 1.4785445287222192, - "grad_norm": 0.9937110543251038, - "learning_rate": 2.5357591187963016e-05, - "loss": 0.5706, - "step": 167250 - }, - { - "epoch": 1.4786329319825315, - "grad_norm": 1.5161702632904053, - "learning_rate": 2.535611780029114e-05, - "loss": 0.5916, - "step": 167260 - }, - { - "epoch": 1.4787213352428439, - "grad_norm": 2.774387836456299, - "learning_rate": 2.5354644412619273e-05, - "loss": 0.6247, - "step": 167270 - }, - { - "epoch": 1.478809738503156, - "grad_norm": 1.867197036743164, - "learning_rate": 2.5353171024947404e-05, - "loss": 0.6725, - "step": 167280 - }, - { - "epoch": 1.4788981417634681, - "grad_norm": 3.0254671573638916, - "learning_rate": 2.535169763727553e-05, - "loss": 0.5347, - "step": 167290 - }, - { - "epoch": 1.4789865450237805, - "grad_norm": 6.657210350036621, - "learning_rate": 2.535022424960366e-05, - "loss": 0.6858, - "step": 167300 - }, - { - "epoch": 1.4790749482840928, - "grad_norm": 5.8452558517456055, - "learning_rate": 2.5348750861931793e-05, - "loss": 0.5615, - "step": 167310 - }, - { - "epoch": 1.479163351544405, - "grad_norm": 8.66779899597168, - "learning_rate": 2.5347277474259918e-05, - "loss": 0.7571, - "step": 167320 - }, - { - "epoch": 1.4792517548047173, - "grad_norm": 2.419351577758789, - "learning_rate": 2.534580408658805e-05, - "loss": 0.6538, - "step": 167330 - }, - { - "epoch": 1.4793401580650294, - "grad_norm": 2.094560384750366, - "learning_rate": 2.534433069891618e-05, - "loss": 0.6737, - "step": 167340 - }, - { - "epoch": 1.4794285613253417, - "grad_norm": 4.466786861419678, - "learning_rate": 2.5342857311244306e-05, - "loss": 0.4474, - "step": 167350 - }, - { - "epoch": 1.4795169645856538, - "grad_norm": 2.4063034057617188, - "learning_rate": 2.5341383923572438e-05, - "loss": 0.5744, - "step": 167360 - }, - { - "epoch": 1.4796053678459662, - "grad_norm": 17.483577728271484, - "learning_rate": 2.5339910535900563e-05, - "loss": 0.5744, - "step": 167370 - }, - { - "epoch": 1.4796937711062785, - "grad_norm": 3.8088626861572266, - "learning_rate": 2.5338437148228695e-05, - "loss": 0.5904, - "step": 167380 - }, - { - "epoch": 1.4797821743665907, - "grad_norm": 9.049639701843262, - "learning_rate": 2.5336963760556826e-05, - "loss": 0.492, - "step": 167390 - }, - { - "epoch": 1.4798705776269028, - "grad_norm": 3.6838390827178955, - "learning_rate": 2.533549037288495e-05, - "loss": 0.5937, - "step": 167400 - }, - { - "epoch": 1.4799589808872151, - "grad_norm": 4.280373573303223, - "learning_rate": 2.5334016985213083e-05, - "loss": 0.5342, - "step": 167410 - }, - { - "epoch": 1.4800473841475275, - "grad_norm": 3.391746759414673, - "learning_rate": 2.5332543597541215e-05, - "loss": 0.6239, - "step": 167420 - }, - { - "epoch": 1.4801357874078396, - "grad_norm": 3.2820661067962646, - "learning_rate": 2.533107020986934e-05, - "loss": 0.4712, - "step": 167430 - }, - { - "epoch": 1.480224190668152, - "grad_norm": 3.8523433208465576, - "learning_rate": 2.532959682219747e-05, - "loss": 0.6637, - "step": 167440 - }, - { - "epoch": 1.480312593928464, - "grad_norm": 1.9412356615066528, - "learning_rate": 2.5328123434525603e-05, - "loss": 0.5292, - "step": 167450 - }, - { - "epoch": 1.4804009971887764, - "grad_norm": 2.5025839805603027, - "learning_rate": 2.5326650046853728e-05, - "loss": 0.5537, - "step": 167460 - }, - { - "epoch": 1.4804894004490885, - "grad_norm": 8.144207000732422, - "learning_rate": 2.532517665918186e-05, - "loss": 0.6397, - "step": 167470 - }, - { - "epoch": 1.4805778037094008, - "grad_norm": 5.0364580154418945, - "learning_rate": 2.5323703271509985e-05, - "loss": 0.6693, - "step": 167480 - }, - { - "epoch": 1.480666206969713, - "grad_norm": 3.9354279041290283, - "learning_rate": 2.5322229883838117e-05, - "loss": 0.6508, - "step": 167490 - }, - { - "epoch": 1.4807546102300253, - "grad_norm": 1.4934314489364624, - "learning_rate": 2.532075649616625e-05, - "loss": 0.5671, - "step": 167500 - }, - { - "epoch": 1.4808430134903374, - "grad_norm": 1.1495667695999146, - "learning_rate": 2.5319283108494373e-05, - "loss": 0.6857, - "step": 167510 - }, - { - "epoch": 1.4809314167506498, - "grad_norm": 2.913308620452881, - "learning_rate": 2.5317809720822505e-05, - "loss": 0.6824, - "step": 167520 - }, - { - "epoch": 1.481019820010962, - "grad_norm": 2.445190906524658, - "learning_rate": 2.5316336333150637e-05, - "loss": 0.5857, - "step": 167530 - }, - { - "epoch": 1.4811082232712742, - "grad_norm": 1.6554746627807617, - "learning_rate": 2.5314862945478762e-05, - "loss": 0.7662, - "step": 167540 - }, - { - "epoch": 1.4811966265315863, - "grad_norm": 1.173087477684021, - "learning_rate": 2.5313389557806894e-05, - "loss": 0.6402, - "step": 167550 - }, - { - "epoch": 1.4812850297918987, - "grad_norm": 1.0074834823608398, - "learning_rate": 2.5311916170135025e-05, - "loss": 0.5489, - "step": 167560 - }, - { - "epoch": 1.481373433052211, - "grad_norm": 3.9076647758483887, - "learning_rate": 2.531044278246315e-05, - "loss": 0.5338, - "step": 167570 - }, - { - "epoch": 1.4814618363125232, - "grad_norm": 3.726008415222168, - "learning_rate": 2.5308969394791282e-05, - "loss": 0.6305, - "step": 167580 - }, - { - "epoch": 1.4815502395728355, - "grad_norm": 2.1018142700195312, - "learning_rate": 2.5307496007119407e-05, - "loss": 0.5941, - "step": 167590 - }, - { - "epoch": 1.4816386428331476, - "grad_norm": 1.6468955278396606, - "learning_rate": 2.530602261944754e-05, - "loss": 0.5982, - "step": 167600 - }, - { - "epoch": 1.48172704609346, - "grad_norm": 7.451429843902588, - "learning_rate": 2.530454923177567e-05, - "loss": 0.5749, - "step": 167610 - }, - { - "epoch": 1.481815449353772, - "grad_norm": 2.754551887512207, - "learning_rate": 2.5303075844103796e-05, - "loss": 0.7129, - "step": 167620 - }, - { - "epoch": 1.4819038526140844, - "grad_norm": 4.118371486663818, - "learning_rate": 2.5301602456431927e-05, - "loss": 0.6339, - "step": 167630 - }, - { - "epoch": 1.4819922558743968, - "grad_norm": 2.545444965362549, - "learning_rate": 2.530012906876006e-05, - "loss": 0.5962, - "step": 167640 - }, - { - "epoch": 1.4820806591347089, - "grad_norm": 5.016569137573242, - "learning_rate": 2.5298655681088184e-05, - "loss": 0.4963, - "step": 167650 - }, - { - "epoch": 1.482169062395021, - "grad_norm": 2.780261278152466, - "learning_rate": 2.5297182293416316e-05, - "loss": 0.5706, - "step": 167660 - }, - { - "epoch": 1.4822574656553333, - "grad_norm": 5.90531587600708, - "learning_rate": 2.5295708905744447e-05, - "loss": 0.6377, - "step": 167670 - }, - { - "epoch": 1.4823458689156457, - "grad_norm": 17.049043655395508, - "learning_rate": 2.5294235518072572e-05, - "loss": 0.6117, - "step": 167680 - }, - { - "epoch": 1.4824342721759578, - "grad_norm": 1.530900478363037, - "learning_rate": 2.5292762130400704e-05, - "loss": 0.5135, - "step": 167690 - }, - { - "epoch": 1.4825226754362701, - "grad_norm": 4.286895275115967, - "learning_rate": 2.529128874272883e-05, - "loss": 0.5757, - "step": 167700 - }, - { - "epoch": 1.4826110786965823, - "grad_norm": 4.696872234344482, - "learning_rate": 2.528981535505696e-05, - "loss": 0.5842, - "step": 167710 - }, - { - "epoch": 1.4826994819568946, - "grad_norm": 2.500370502471924, - "learning_rate": 2.5288341967385093e-05, - "loss": 0.5894, - "step": 167720 - }, - { - "epoch": 1.4827878852172067, - "grad_norm": 8.937431335449219, - "learning_rate": 2.5286868579713218e-05, - "loss": 0.5779, - "step": 167730 - }, - { - "epoch": 1.482876288477519, - "grad_norm": 1.4781537055969238, - "learning_rate": 2.528539519204135e-05, - "loss": 0.6223, - "step": 167740 - }, - { - "epoch": 1.4829646917378314, - "grad_norm": 3.4986331462860107, - "learning_rate": 2.528392180436948e-05, - "loss": 0.5833, - "step": 167750 - }, - { - "epoch": 1.4830530949981435, - "grad_norm": 2.8727478981018066, - "learning_rate": 2.5282448416697606e-05, - "loss": 0.711, - "step": 167760 - }, - { - "epoch": 1.4831414982584556, - "grad_norm": 1.9648290872573853, - "learning_rate": 2.5280975029025738e-05, - "loss": 0.558, - "step": 167770 - }, - { - "epoch": 1.483229901518768, - "grad_norm": 2.376296281814575, - "learning_rate": 2.527950164135387e-05, - "loss": 0.5542, - "step": 167780 - }, - { - "epoch": 1.4833183047790803, - "grad_norm": 14.182267189025879, - "learning_rate": 2.5278028253681994e-05, - "loss": 0.6262, - "step": 167790 - }, - { - "epoch": 1.4834067080393925, - "grad_norm": 1.7571372985839844, - "learning_rate": 2.5276554866010126e-05, - "loss": 0.3986, - "step": 167800 - }, - { - "epoch": 1.4834951112997048, - "grad_norm": 2.912757158279419, - "learning_rate": 2.5275081478338258e-05, - "loss": 0.5501, - "step": 167810 - }, - { - "epoch": 1.483583514560017, - "grad_norm": 2.827099323272705, - "learning_rate": 2.5273608090666383e-05, - "loss": 0.7058, - "step": 167820 - }, - { - "epoch": 1.4836719178203293, - "grad_norm": 4.497413158416748, - "learning_rate": 2.5272134702994515e-05, - "loss": 0.6198, - "step": 167830 - }, - { - "epoch": 1.4837603210806414, - "grad_norm": 1.271637201309204, - "learning_rate": 2.527066131532264e-05, - "loss": 0.4485, - "step": 167840 - }, - { - "epoch": 1.4838487243409537, - "grad_norm": 1.8315919637680054, - "learning_rate": 2.526918792765077e-05, - "loss": 0.5869, - "step": 167850 - }, - { - "epoch": 1.483937127601266, - "grad_norm": 1.2556116580963135, - "learning_rate": 2.5267714539978903e-05, - "loss": 0.5199, - "step": 167860 - }, - { - "epoch": 1.4840255308615782, - "grad_norm": 2.636373996734619, - "learning_rate": 2.5266241152307028e-05, - "loss": 0.7162, - "step": 167870 - }, - { - "epoch": 1.4841139341218903, - "grad_norm": 2.2732982635498047, - "learning_rate": 2.526476776463516e-05, - "loss": 0.6404, - "step": 167880 - }, - { - "epoch": 1.4842023373822026, - "grad_norm": 13.094659805297852, - "learning_rate": 2.526329437696329e-05, - "loss": 0.6638, - "step": 167890 - }, - { - "epoch": 1.484290740642515, - "grad_norm": 10.085988998413086, - "learning_rate": 2.5261820989291417e-05, - "loss": 0.723, - "step": 167900 - }, - { - "epoch": 1.484379143902827, - "grad_norm": 1.838112473487854, - "learning_rate": 2.5260347601619548e-05, - "loss": 0.6037, - "step": 167910 - }, - { - "epoch": 1.4844675471631394, - "grad_norm": 2.0667428970336914, - "learning_rate": 2.525887421394768e-05, - "loss": 0.6192, - "step": 167920 - }, - { - "epoch": 1.4845559504234516, - "grad_norm": 1.7449721097946167, - "learning_rate": 2.5257400826275805e-05, - "loss": 0.4779, - "step": 167930 - }, - { - "epoch": 1.484644353683764, - "grad_norm": 3.658342123031616, - "learning_rate": 2.5255927438603937e-05, - "loss": 0.5418, - "step": 167940 - }, - { - "epoch": 1.484732756944076, - "grad_norm": 5.174700736999512, - "learning_rate": 2.5254454050932065e-05, - "loss": 0.505, - "step": 167950 - }, - { - "epoch": 1.4848211602043884, - "grad_norm": 2.033924102783203, - "learning_rate": 2.5252980663260193e-05, - "loss": 0.6025, - "step": 167960 - }, - { - "epoch": 1.4849095634647007, - "grad_norm": 2.3899896144866943, - "learning_rate": 2.5251507275588325e-05, - "loss": 0.5726, - "step": 167970 - }, - { - "epoch": 1.4849979667250128, - "grad_norm": 1.7553874254226685, - "learning_rate": 2.5250033887916454e-05, - "loss": 0.6192, - "step": 167980 - }, - { - "epoch": 1.485086369985325, - "grad_norm": 2.7527503967285156, - "learning_rate": 2.5248560500244582e-05, - "loss": 0.6492, - "step": 167990 - }, - { - "epoch": 1.4851747732456373, - "grad_norm": 4.926758766174316, - "learning_rate": 2.5247087112572714e-05, - "loss": 0.6981, - "step": 168000 - }, - { - "epoch": 1.4852631765059496, - "grad_norm": 5.197306156158447, - "learning_rate": 2.5245613724900842e-05, - "loss": 0.5863, - "step": 168010 - }, - { - "epoch": 1.4853515797662618, - "grad_norm": 10.739989280700684, - "learning_rate": 2.524414033722897e-05, - "loss": 0.5708, - "step": 168020 - }, - { - "epoch": 1.485439983026574, - "grad_norm": 1.9307574033737183, - "learning_rate": 2.5242666949557102e-05, - "loss": 0.5454, - "step": 168030 - }, - { - "epoch": 1.4855283862868862, - "grad_norm": 3.721250534057617, - "learning_rate": 2.524119356188523e-05, - "loss": 0.5725, - "step": 168040 - }, - { - "epoch": 1.4856167895471986, - "grad_norm": 4.485631465911865, - "learning_rate": 2.523972017421336e-05, - "loss": 0.6236, - "step": 168050 - }, - { - "epoch": 1.4857051928075107, - "grad_norm": 3.744614839553833, - "learning_rate": 2.5238246786541487e-05, - "loss": 0.6876, - "step": 168060 - }, - { - "epoch": 1.485793596067823, - "grad_norm": 2.1492726802825928, - "learning_rate": 2.523677339886962e-05, - "loss": 0.6738, - "step": 168070 - }, - { - "epoch": 1.4858819993281351, - "grad_norm": 2.7124409675598145, - "learning_rate": 2.5235300011197747e-05, - "loss": 0.4988, - "step": 168080 - }, - { - "epoch": 1.4859704025884475, - "grad_norm": 10.714515686035156, - "learning_rate": 2.5233826623525876e-05, - "loss": 0.5874, - "step": 168090 - }, - { - "epoch": 1.4860588058487596, - "grad_norm": 8.479536056518555, - "learning_rate": 2.5232353235854007e-05, - "loss": 0.5909, - "step": 168100 - }, - { - "epoch": 1.486147209109072, - "grad_norm": 14.649287223815918, - "learning_rate": 2.5230879848182136e-05, - "loss": 0.7773, - "step": 168110 - }, - { - "epoch": 1.4862356123693843, - "grad_norm": 6.432976245880127, - "learning_rate": 2.5229406460510264e-05, - "loss": 0.5694, - "step": 168120 - }, - { - "epoch": 1.4863240156296964, - "grad_norm": 3.3449676036834717, - "learning_rate": 2.5227933072838396e-05, - "loss": 0.6942, - "step": 168130 - }, - { - "epoch": 1.4864124188900087, - "grad_norm": 7.80836820602417, - "learning_rate": 2.5226459685166524e-05, - "loss": 0.563, - "step": 168140 - }, - { - "epoch": 1.4865008221503209, - "grad_norm": 1.676095962524414, - "learning_rate": 2.5224986297494652e-05, - "loss": 0.6054, - "step": 168150 - }, - { - "epoch": 1.4865892254106332, - "grad_norm": 3.417198419570923, - "learning_rate": 2.5223512909822784e-05, - "loss": 0.5569, - "step": 168160 - }, - { - "epoch": 1.4866776286709453, - "grad_norm": 1.5840697288513184, - "learning_rate": 2.522203952215091e-05, - "loss": 0.4732, - "step": 168170 - }, - { - "epoch": 1.4867660319312577, - "grad_norm": 3.6179959774017334, - "learning_rate": 2.522056613447904e-05, - "loss": 0.5414, - "step": 168180 - }, - { - "epoch": 1.4868544351915698, - "grad_norm": 4.9565043449401855, - "learning_rate": 2.5219092746807173e-05, - "loss": 0.6726, - "step": 168190 - }, - { - "epoch": 1.4869428384518821, - "grad_norm": 2.658311605453491, - "learning_rate": 2.5217619359135298e-05, - "loss": 0.6698, - "step": 168200 - }, - { - "epoch": 1.4870312417121943, - "grad_norm": 2.3924074172973633, - "learning_rate": 2.521614597146343e-05, - "loss": 0.5888, - "step": 168210 - }, - { - "epoch": 1.4871196449725066, - "grad_norm": 2.092282295227051, - "learning_rate": 2.521467258379156e-05, - "loss": 0.6933, - "step": 168220 - }, - { - "epoch": 1.487208048232819, - "grad_norm": 5.658856391906738, - "learning_rate": 2.5213199196119686e-05, - "loss": 0.5825, - "step": 168230 - }, - { - "epoch": 1.487296451493131, - "grad_norm": 1.1307692527770996, - "learning_rate": 2.5211725808447818e-05, - "loss": 0.6151, - "step": 168240 - }, - { - "epoch": 1.4873848547534432, - "grad_norm": 2.23170804977417, - "learning_rate": 2.521025242077595e-05, - "loss": 0.7011, - "step": 168250 - }, - { - "epoch": 1.4874732580137555, - "grad_norm": 1.663549780845642, - "learning_rate": 2.5208779033104075e-05, - "loss": 0.5642, - "step": 168260 - }, - { - "epoch": 1.4875616612740679, - "grad_norm": 3.4748644828796387, - "learning_rate": 2.5207305645432206e-05, - "loss": 0.5497, - "step": 168270 - }, - { - "epoch": 1.48765006453438, - "grad_norm": 1.6814273595809937, - "learning_rate": 2.5205832257760338e-05, - "loss": 0.5978, - "step": 168280 - }, - { - "epoch": 1.4877384677946923, - "grad_norm": 1.6505483388900757, - "learning_rate": 2.5204358870088463e-05, - "loss": 0.559, - "step": 168290 - }, - { - "epoch": 1.4878268710550044, - "grad_norm": 1.7569222450256348, - "learning_rate": 2.5202885482416595e-05, - "loss": 0.6306, - "step": 168300 - }, - { - "epoch": 1.4879152743153168, - "grad_norm": 1.5775505304336548, - "learning_rate": 2.520141209474472e-05, - "loss": 0.6264, - "step": 168310 - }, - { - "epoch": 1.488003677575629, - "grad_norm": 1.5059866905212402, - "learning_rate": 2.519993870707285e-05, - "loss": 0.5655, - "step": 168320 - }, - { - "epoch": 1.4880920808359412, - "grad_norm": 7.040750980377197, - "learning_rate": 2.5198465319400983e-05, - "loss": 0.5223, - "step": 168330 - }, - { - "epoch": 1.4881804840962536, - "grad_norm": 1.3542442321777344, - "learning_rate": 2.5196991931729108e-05, - "loss": 0.6205, - "step": 168340 - }, - { - "epoch": 1.4882688873565657, - "grad_norm": 8.73717975616455, - "learning_rate": 2.519551854405724e-05, - "loss": 0.4427, - "step": 168350 - }, - { - "epoch": 1.4883572906168778, - "grad_norm": 1.6536052227020264, - "learning_rate": 2.519404515638537e-05, - "loss": 0.6414, - "step": 168360 - }, - { - "epoch": 1.4884456938771902, - "grad_norm": 26.23335075378418, - "learning_rate": 2.5192571768713497e-05, - "loss": 0.4855, - "step": 168370 - }, - { - "epoch": 1.4885340971375025, - "grad_norm": 5.047027111053467, - "learning_rate": 2.519109838104163e-05, - "loss": 0.5632, - "step": 168380 - }, - { - "epoch": 1.4886225003978146, - "grad_norm": 1.5244139432907104, - "learning_rate": 2.518962499336976e-05, - "loss": 0.6747, - "step": 168390 - }, - { - "epoch": 1.488710903658127, - "grad_norm": 2.4863431453704834, - "learning_rate": 2.5188151605697885e-05, - "loss": 0.6225, - "step": 168400 - }, - { - "epoch": 1.488799306918439, - "grad_norm": 2.471019744873047, - "learning_rate": 2.5186678218026017e-05, - "loss": 0.5615, - "step": 168410 - }, - { - "epoch": 1.4888877101787514, - "grad_norm": 1.9932076930999756, - "learning_rate": 2.5185204830354142e-05, - "loss": 0.5905, - "step": 168420 - }, - { - "epoch": 1.4889761134390636, - "grad_norm": 1.5231190919876099, - "learning_rate": 2.5183731442682273e-05, - "loss": 0.6244, - "step": 168430 - }, - { - "epoch": 1.489064516699376, - "grad_norm": 1.7419872283935547, - "learning_rate": 2.5182258055010405e-05, - "loss": 0.6957, - "step": 168440 - }, - { - "epoch": 1.4891529199596882, - "grad_norm": 8.614608764648438, - "learning_rate": 2.518078466733853e-05, - "loss": 0.675, - "step": 168450 - }, - { - "epoch": 1.4892413232200004, - "grad_norm": 4.871392250061035, - "learning_rate": 2.5179311279666662e-05, - "loss": 0.537, - "step": 168460 - }, - { - "epoch": 1.4893297264803125, - "grad_norm": 3.458824634552002, - "learning_rate": 2.5177837891994794e-05, - "loss": 0.612, - "step": 168470 - }, - { - "epoch": 1.4894181297406248, - "grad_norm": 1.302211880683899, - "learning_rate": 2.517636450432292e-05, - "loss": 0.558, - "step": 168480 - }, - { - "epoch": 1.4895065330009372, - "grad_norm": 10.931535720825195, - "learning_rate": 2.517489111665105e-05, - "loss": 0.6323, - "step": 168490 - }, - { - "epoch": 1.4895949362612493, - "grad_norm": 5.303699493408203, - "learning_rate": 2.5173417728979182e-05, - "loss": 0.6159, - "step": 168500 - }, - { - "epoch": 1.4896833395215616, - "grad_norm": 2.347606658935547, - "learning_rate": 2.5171944341307307e-05, - "loss": 0.559, - "step": 168510 - }, - { - "epoch": 1.4897717427818737, - "grad_norm": 2.460277795791626, - "learning_rate": 2.517047095363544e-05, - "loss": 0.648, - "step": 168520 - }, - { - "epoch": 1.489860146042186, - "grad_norm": 2.0660288333892822, - "learning_rate": 2.5168997565963564e-05, - "loss": 0.5716, - "step": 168530 - }, - { - "epoch": 1.4899485493024982, - "grad_norm": 0.9978792667388916, - "learning_rate": 2.5167524178291696e-05, - "loss": 0.6552, - "step": 168540 - }, - { - "epoch": 1.4900369525628105, - "grad_norm": 4.9887003898620605, - "learning_rate": 2.5166050790619827e-05, - "loss": 0.579, - "step": 168550 - }, - { - "epoch": 1.4901253558231229, - "grad_norm": 1.4201104640960693, - "learning_rate": 2.5164577402947952e-05, - "loss": 0.5044, - "step": 168560 - }, - { - "epoch": 1.490213759083435, - "grad_norm": 3.3996100425720215, - "learning_rate": 2.5163104015276084e-05, - "loss": 0.6232, - "step": 168570 - }, - { - "epoch": 1.4903021623437471, - "grad_norm": 15.804523468017578, - "learning_rate": 2.5161630627604216e-05, - "loss": 0.5801, - "step": 168580 - }, - { - "epoch": 1.4903905656040595, - "grad_norm": 1.1996616125106812, - "learning_rate": 2.516015723993234e-05, - "loss": 0.5963, - "step": 168590 - }, - { - "epoch": 1.4904789688643718, - "grad_norm": 20.32427215576172, - "learning_rate": 2.5158683852260472e-05, - "loss": 0.5672, - "step": 168600 - }, - { - "epoch": 1.490567372124684, - "grad_norm": 7.99282693862915, - "learning_rate": 2.5157210464588604e-05, - "loss": 0.6394, - "step": 168610 - }, - { - "epoch": 1.4906557753849963, - "grad_norm": 16.958051681518555, - "learning_rate": 2.515573707691673e-05, - "loss": 0.5896, - "step": 168620 - }, - { - "epoch": 1.4907441786453084, - "grad_norm": 8.355259895324707, - "learning_rate": 2.515426368924486e-05, - "loss": 0.5606, - "step": 168630 - }, - { - "epoch": 1.4908325819056207, - "grad_norm": 3.4368736743927, - "learning_rate": 2.5152790301572993e-05, - "loss": 0.5371, - "step": 168640 - }, - { - "epoch": 1.4909209851659329, - "grad_norm": 1.612131953239441, - "learning_rate": 2.5151316913901118e-05, - "loss": 0.6349, - "step": 168650 - }, - { - "epoch": 1.4910093884262452, - "grad_norm": 4.0380048751831055, - "learning_rate": 2.514984352622925e-05, - "loss": 0.547, - "step": 168660 - }, - { - "epoch": 1.4910977916865575, - "grad_norm": 6.6345696449279785, - "learning_rate": 2.5148370138557374e-05, - "loss": 0.5238, - "step": 168670 - }, - { - "epoch": 1.4911861949468697, - "grad_norm": 3.4301276206970215, - "learning_rate": 2.5146896750885506e-05, - "loss": 0.6353, - "step": 168680 - }, - { - "epoch": 1.4912745982071818, - "grad_norm": 6.49104642868042, - "learning_rate": 2.5145423363213638e-05, - "loss": 0.6937, - "step": 168690 - }, - { - "epoch": 1.4913630014674941, - "grad_norm": 4.643334865570068, - "learning_rate": 2.5143949975541763e-05, - "loss": 0.5247, - "step": 168700 - }, - { - "epoch": 1.4914514047278065, - "grad_norm": 3.2362964153289795, - "learning_rate": 2.5142476587869895e-05, - "loss": 0.5551, - "step": 168710 - }, - { - "epoch": 1.4915398079881186, - "grad_norm": 5.963395595550537, - "learning_rate": 2.5141003200198026e-05, - "loss": 0.6472, - "step": 168720 - }, - { - "epoch": 1.491628211248431, - "grad_norm": 4.386768341064453, - "learning_rate": 2.513952981252615e-05, - "loss": 0.6609, - "step": 168730 - }, - { - "epoch": 1.491716614508743, - "grad_norm": 8.722784042358398, - "learning_rate": 2.5138056424854283e-05, - "loss": 0.8431, - "step": 168740 - }, - { - "epoch": 1.4918050177690554, - "grad_norm": 2.4228360652923584, - "learning_rate": 2.5136583037182415e-05, - "loss": 0.6162, - "step": 168750 - }, - { - "epoch": 1.4918934210293675, - "grad_norm": 0.815682590007782, - "learning_rate": 2.513510964951054e-05, - "loss": 0.5852, - "step": 168760 - }, - { - "epoch": 1.4919818242896798, - "grad_norm": 3.2544634342193604, - "learning_rate": 2.513363626183867e-05, - "loss": 0.6008, - "step": 168770 - }, - { - "epoch": 1.492070227549992, - "grad_norm": 1.5924484729766846, - "learning_rate": 2.5132162874166796e-05, - "loss": 0.5001, - "step": 168780 - }, - { - "epoch": 1.4921586308103043, - "grad_norm": 2.2395777702331543, - "learning_rate": 2.5130689486494928e-05, - "loss": 0.7412, - "step": 168790 - }, - { - "epoch": 1.4922470340706164, - "grad_norm": 3.179886817932129, - "learning_rate": 2.512921609882306e-05, - "loss": 0.6234, - "step": 168800 - }, - { - "epoch": 1.4923354373309288, - "grad_norm": 3.3182594776153564, - "learning_rate": 2.5127742711151185e-05, - "loss": 0.6121, - "step": 168810 - }, - { - "epoch": 1.4924238405912411, - "grad_norm": 1.2474032640457153, - "learning_rate": 2.5126269323479317e-05, - "loss": 0.552, - "step": 168820 - }, - { - "epoch": 1.4925122438515532, - "grad_norm": 3.5903759002685547, - "learning_rate": 2.5124795935807448e-05, - "loss": 0.6559, - "step": 168830 - }, - { - "epoch": 1.4926006471118654, - "grad_norm": 2.073939800262451, - "learning_rate": 2.5123322548135573e-05, - "loss": 0.5857, - "step": 168840 - }, - { - "epoch": 1.4926890503721777, - "grad_norm": 2.900148391723633, - "learning_rate": 2.5121849160463705e-05, - "loss": 0.587, - "step": 168850 - }, - { - "epoch": 1.49277745363249, - "grad_norm": 2.4644055366516113, - "learning_rate": 2.5120375772791837e-05, - "loss": 0.5949, - "step": 168860 - }, - { - "epoch": 1.4928658568928022, - "grad_norm": 1.1597604751586914, - "learning_rate": 2.5118902385119962e-05, - "loss": 0.6415, - "step": 168870 - }, - { - "epoch": 1.4929542601531145, - "grad_norm": 2.5917768478393555, - "learning_rate": 2.5117428997448093e-05, - "loss": 0.6046, - "step": 168880 - }, - { - "epoch": 1.4930426634134266, - "grad_norm": 2.9160399436950684, - "learning_rate": 2.5115955609776222e-05, - "loss": 0.5587, - "step": 168890 - }, - { - "epoch": 1.493131066673739, - "grad_norm": 1.3189928531646729, - "learning_rate": 2.5114482222104354e-05, - "loss": 0.5735, - "step": 168900 - }, - { - "epoch": 1.493219469934051, - "grad_norm": 1.201468825340271, - "learning_rate": 2.5113008834432482e-05, - "loss": 0.7179, - "step": 168910 - }, - { - "epoch": 1.4933078731943634, - "grad_norm": 3.8716065883636475, - "learning_rate": 2.511153544676061e-05, - "loss": 0.5129, - "step": 168920 - }, - { - "epoch": 1.4933962764546758, - "grad_norm": 4.122172832489014, - "learning_rate": 2.5110062059088742e-05, - "loss": 0.6779, - "step": 168930 - }, - { - "epoch": 1.4934846797149879, - "grad_norm": 3.85265851020813, - "learning_rate": 2.510858867141687e-05, - "loss": 0.7558, - "step": 168940 - }, - { - "epoch": 1.4935730829753, - "grad_norm": 6.949263095855713, - "learning_rate": 2.5107115283745e-05, - "loss": 0.5533, - "step": 168950 - }, - { - "epoch": 1.4936614862356123, - "grad_norm": 2.57426118850708, - "learning_rate": 2.510564189607313e-05, - "loss": 0.824, - "step": 168960 - }, - { - "epoch": 1.4937498894959247, - "grad_norm": 4.959727764129639, - "learning_rate": 2.510416850840126e-05, - "loss": 0.8049, - "step": 168970 - }, - { - "epoch": 1.4938382927562368, - "grad_norm": 1.691426157951355, - "learning_rate": 2.5102695120729387e-05, - "loss": 0.4823, - "step": 168980 - }, - { - "epoch": 1.4939266960165491, - "grad_norm": 3.8828234672546387, - "learning_rate": 2.510122173305752e-05, - "loss": 0.6448, - "step": 168990 - }, - { - "epoch": 1.4940150992768613, - "grad_norm": 14.066272735595703, - "learning_rate": 2.5099748345385644e-05, - "loss": 0.5908, - "step": 169000 - }, - { - "epoch": 1.4941035025371736, - "grad_norm": 1.3125174045562744, - "learning_rate": 2.5098274957713776e-05, - "loss": 0.6485, - "step": 169010 - }, - { - "epoch": 1.4941919057974857, - "grad_norm": 1.0090336799621582, - "learning_rate": 2.5096801570041907e-05, - "loss": 0.5247, - "step": 169020 - }, - { - "epoch": 1.494280309057798, - "grad_norm": 5.59339714050293, - "learning_rate": 2.5095328182370032e-05, - "loss": 0.4493, - "step": 169030 - }, - { - "epoch": 1.4943687123181104, - "grad_norm": 1.753861427307129, - "learning_rate": 2.5093854794698164e-05, - "loss": 0.687, - "step": 169040 - }, - { - "epoch": 1.4944571155784225, - "grad_norm": 1.7943350076675415, - "learning_rate": 2.5092381407026296e-05, - "loss": 0.5861, - "step": 169050 - }, - { - "epoch": 1.4945455188387347, - "grad_norm": 9.86168384552002, - "learning_rate": 2.509090801935442e-05, - "loss": 0.5855, - "step": 169060 - }, - { - "epoch": 1.494633922099047, - "grad_norm": 2.938364028930664, - "learning_rate": 2.5089434631682553e-05, - "loss": 0.6469, - "step": 169070 - }, - { - "epoch": 1.4947223253593593, - "grad_norm": 0.9118348956108093, - "learning_rate": 2.5087961244010684e-05, - "loss": 0.5248, - "step": 169080 - }, - { - "epoch": 1.4948107286196715, - "grad_norm": 3.6555280685424805, - "learning_rate": 2.508648785633881e-05, - "loss": 0.5285, - "step": 169090 - }, - { - "epoch": 1.4948991318799838, - "grad_norm": 2.906902313232422, - "learning_rate": 2.508501446866694e-05, - "loss": 0.6011, - "step": 169100 - }, - { - "epoch": 1.494987535140296, - "grad_norm": 1.880859136581421, - "learning_rate": 2.5083541080995073e-05, - "loss": 0.5405, - "step": 169110 - }, - { - "epoch": 1.4950759384006083, - "grad_norm": 1.6957145929336548, - "learning_rate": 2.5082067693323198e-05, - "loss": 0.5583, - "step": 169120 - }, - { - "epoch": 1.4951643416609204, - "grad_norm": 5.842836380004883, - "learning_rate": 2.508059430565133e-05, - "loss": 0.6368, - "step": 169130 - }, - { - "epoch": 1.4952527449212327, - "grad_norm": 7.523136138916016, - "learning_rate": 2.5079120917979454e-05, - "loss": 0.4566, - "step": 169140 - }, - { - "epoch": 1.495341148181545, - "grad_norm": 1.844063639640808, - "learning_rate": 2.5077647530307586e-05, - "loss": 0.7034, - "step": 169150 - }, - { - "epoch": 1.4954295514418572, - "grad_norm": 1.8485033512115479, - "learning_rate": 2.5076174142635718e-05, - "loss": 0.6989, - "step": 169160 - }, - { - "epoch": 1.4955179547021693, - "grad_norm": 1.600848913192749, - "learning_rate": 2.5074700754963843e-05, - "loss": 0.5078, - "step": 169170 - }, - { - "epoch": 1.4956063579624816, - "grad_norm": 2.792823314666748, - "learning_rate": 2.5073227367291975e-05, - "loss": 0.6132, - "step": 169180 - }, - { - "epoch": 1.495694761222794, - "grad_norm": 1.3458143472671509, - "learning_rate": 2.5071753979620106e-05, - "loss": 0.5546, - "step": 169190 - }, - { - "epoch": 1.495783164483106, - "grad_norm": 7.214616298675537, - "learning_rate": 2.507028059194823e-05, - "loss": 0.6309, - "step": 169200 - }, - { - "epoch": 1.4958715677434185, - "grad_norm": 4.552582740783691, - "learning_rate": 2.5068807204276363e-05, - "loss": 0.6711, - "step": 169210 - }, - { - "epoch": 1.4959599710037306, - "grad_norm": 1.661666750907898, - "learning_rate": 2.5067333816604495e-05, - "loss": 0.5191, - "step": 169220 - }, - { - "epoch": 1.496048374264043, - "grad_norm": 3.7032909393310547, - "learning_rate": 2.506586042893262e-05, - "loss": 0.7553, - "step": 169230 - }, - { - "epoch": 1.496136777524355, - "grad_norm": 5.18577766418457, - "learning_rate": 2.506438704126075e-05, - "loss": 0.5995, - "step": 169240 - }, - { - "epoch": 1.4962251807846674, - "grad_norm": 2.2874503135681152, - "learning_rate": 2.5062913653588876e-05, - "loss": 0.7453, - "step": 169250 - }, - { - "epoch": 1.4963135840449797, - "grad_norm": 6.964410305023193, - "learning_rate": 2.5061440265917008e-05, - "loss": 0.7524, - "step": 169260 - }, - { - "epoch": 1.4964019873052918, - "grad_norm": 3.547203540802002, - "learning_rate": 2.505996687824514e-05, - "loss": 0.5764, - "step": 169270 - }, - { - "epoch": 1.496490390565604, - "grad_norm": 1.2275030612945557, - "learning_rate": 2.5058493490573265e-05, - "loss": 0.4425, - "step": 169280 - }, - { - "epoch": 1.4965787938259163, - "grad_norm": 2.9658262729644775, - "learning_rate": 2.5057020102901397e-05, - "loss": 0.6042, - "step": 169290 - }, - { - "epoch": 1.4966671970862286, - "grad_norm": 5.516174793243408, - "learning_rate": 2.505554671522953e-05, - "loss": 0.4284, - "step": 169300 - }, - { - "epoch": 1.4967556003465408, - "grad_norm": 3.4635496139526367, - "learning_rate": 2.5054073327557653e-05, - "loss": 0.5782, - "step": 169310 - }, - { - "epoch": 1.496844003606853, - "grad_norm": 5.4861016273498535, - "learning_rate": 2.5052599939885785e-05, - "loss": 0.4534, - "step": 169320 - }, - { - "epoch": 1.4969324068671652, - "grad_norm": 1.2953311204910278, - "learning_rate": 2.5051126552213917e-05, - "loss": 0.59, - "step": 169330 - }, - { - "epoch": 1.4970208101274776, - "grad_norm": 14.16625690460205, - "learning_rate": 2.5049653164542042e-05, - "loss": 0.5066, - "step": 169340 - }, - { - "epoch": 1.4971092133877897, - "grad_norm": 2.735600233078003, - "learning_rate": 2.5048179776870174e-05, - "loss": 0.6294, - "step": 169350 - }, - { - "epoch": 1.497197616648102, - "grad_norm": 2.928574562072754, - "learning_rate": 2.50467063891983e-05, - "loss": 0.653, - "step": 169360 - }, - { - "epoch": 1.4972860199084141, - "grad_norm": 0.7642167210578918, - "learning_rate": 2.504523300152643e-05, - "loss": 0.5845, - "step": 169370 - }, - { - "epoch": 1.4973744231687265, - "grad_norm": 1.8937228918075562, - "learning_rate": 2.5043759613854562e-05, - "loss": 0.5038, - "step": 169380 - }, - { - "epoch": 1.4974628264290386, - "grad_norm": 1.8398478031158447, - "learning_rate": 2.5042286226182687e-05, - "loss": 0.6733, - "step": 169390 - }, - { - "epoch": 1.497551229689351, - "grad_norm": 2.191654682159424, - "learning_rate": 2.504081283851082e-05, - "loss": 0.6384, - "step": 169400 - }, - { - "epoch": 1.4976396329496633, - "grad_norm": 2.1948232650756836, - "learning_rate": 2.503933945083895e-05, - "loss": 0.5465, - "step": 169410 - }, - { - "epoch": 1.4977280362099754, - "grad_norm": 1.8171820640563965, - "learning_rate": 2.5037866063167075e-05, - "loss": 0.4783, - "step": 169420 - }, - { - "epoch": 1.4978164394702875, - "grad_norm": 4.705183506011963, - "learning_rate": 2.5036392675495207e-05, - "loss": 0.478, - "step": 169430 - }, - { - "epoch": 1.4979048427305999, - "grad_norm": 1.259000539779663, - "learning_rate": 2.503491928782334e-05, - "loss": 0.5277, - "step": 169440 - }, - { - "epoch": 1.4979932459909122, - "grad_norm": 4.042740345001221, - "learning_rate": 2.5033445900151464e-05, - "loss": 0.499, - "step": 169450 - }, - { - "epoch": 1.4980816492512243, - "grad_norm": 1.422348976135254, - "learning_rate": 2.5031972512479596e-05, - "loss": 0.5433, - "step": 169460 - }, - { - "epoch": 1.4981700525115367, - "grad_norm": 1.154599905014038, - "learning_rate": 2.503049912480772e-05, - "loss": 0.4343, - "step": 169470 - }, - { - "epoch": 1.4982584557718488, - "grad_norm": 3.9661824703216553, - "learning_rate": 2.5029025737135852e-05, - "loss": 0.6969, - "step": 169480 - }, - { - "epoch": 1.4983468590321611, - "grad_norm": 3.3000917434692383, - "learning_rate": 2.5027552349463984e-05, - "loss": 0.6381, - "step": 169490 - }, - { - "epoch": 1.4984352622924733, - "grad_norm": 1.0850319862365723, - "learning_rate": 2.502607896179211e-05, - "loss": 0.5702, - "step": 169500 - }, - { - "epoch": 1.4985236655527856, - "grad_norm": 6.473759174346924, - "learning_rate": 2.502460557412024e-05, - "loss": 0.6195, - "step": 169510 - }, - { - "epoch": 1.498612068813098, - "grad_norm": 1.8104168176651, - "learning_rate": 2.5023132186448372e-05, - "loss": 0.6375, - "step": 169520 - }, - { - "epoch": 1.49870047207341, - "grad_norm": 1.222034215927124, - "learning_rate": 2.5021658798776497e-05, - "loss": 0.6913, - "step": 169530 - }, - { - "epoch": 1.4987888753337222, - "grad_norm": 2.7687134742736816, - "learning_rate": 2.502018541110463e-05, - "loss": 0.6479, - "step": 169540 - }, - { - "epoch": 1.4988772785940345, - "grad_norm": 1.4761496782302856, - "learning_rate": 2.501871202343276e-05, - "loss": 0.4683, - "step": 169550 - }, - { - "epoch": 1.4989656818543469, - "grad_norm": 3.6298413276672363, - "learning_rate": 2.5017238635760886e-05, - "loss": 0.64, - "step": 169560 - }, - { - "epoch": 1.499054085114659, - "grad_norm": 7.204841136932373, - "learning_rate": 2.5015765248089018e-05, - "loss": 0.5428, - "step": 169570 - }, - { - "epoch": 1.4991424883749713, - "grad_norm": 0.9959343075752258, - "learning_rate": 2.501429186041715e-05, - "loss": 0.4581, - "step": 169580 - }, - { - "epoch": 1.4992308916352834, - "grad_norm": 2.387869358062744, - "learning_rate": 2.5012818472745274e-05, - "loss": 0.6253, - "step": 169590 - }, - { - "epoch": 1.4993192948955958, - "grad_norm": 1.51453697681427, - "learning_rate": 2.5011345085073406e-05, - "loss": 0.6093, - "step": 169600 - }, - { - "epoch": 1.499407698155908, - "grad_norm": 4.41741943359375, - "learning_rate": 2.500987169740153e-05, - "loss": 0.6319, - "step": 169610 - }, - { - "epoch": 1.4994961014162203, - "grad_norm": 2.2445061206817627, - "learning_rate": 2.5008398309729663e-05, - "loss": 0.6416, - "step": 169620 - }, - { - "epoch": 1.4995845046765326, - "grad_norm": 1.8241304159164429, - "learning_rate": 2.5006924922057795e-05, - "loss": 0.6524, - "step": 169630 - }, - { - "epoch": 1.4996729079368447, - "grad_norm": 2.927823543548584, - "learning_rate": 2.500545153438592e-05, - "loss": 0.5658, - "step": 169640 - }, - { - "epoch": 1.4997613111971568, - "grad_norm": 2.05664324760437, - "learning_rate": 2.500397814671405e-05, - "loss": 0.6525, - "step": 169650 - }, - { - "epoch": 1.4998497144574692, - "grad_norm": 6.470807075500488, - "learning_rate": 2.5002504759042183e-05, - "loss": 0.6434, - "step": 169660 - }, - { - "epoch": 1.4999381177177815, - "grad_norm": 2.6049599647521973, - "learning_rate": 2.5001031371370308e-05, - "loss": 0.5496, - "step": 169670 - }, - { - "epoch": 1.5000265209780936, - "grad_norm": 2.938055992126465, - "learning_rate": 2.499955798369844e-05, - "loss": 0.6717, - "step": 169680 - }, - { - "epoch": 1.5001149242384058, - "grad_norm": 1.9531197547912598, - "learning_rate": 2.4998084596026568e-05, - "loss": 0.5321, - "step": 169690 - }, - { - "epoch": 1.500203327498718, - "grad_norm": 2.913414478302002, - "learning_rate": 2.4996611208354696e-05, - "loss": 0.6191, - "step": 169700 - }, - { - "epoch": 1.5002917307590304, - "grad_norm": 5.947141170501709, - "learning_rate": 2.4995137820682828e-05, - "loss": 0.615, - "step": 169710 - }, - { - "epoch": 1.5003801340193426, - "grad_norm": 2.870274782180786, - "learning_rate": 2.4993664433010957e-05, - "loss": 0.6628, - "step": 169720 - }, - { - "epoch": 1.500468537279655, - "grad_norm": 2.479779005050659, - "learning_rate": 2.4992191045339085e-05, - "loss": 0.6681, - "step": 169730 - }, - { - "epoch": 1.5005569405399672, - "grad_norm": 2.949855089187622, - "learning_rate": 2.4990717657667213e-05, - "loss": 0.6598, - "step": 169740 - }, - { - "epoch": 1.5006453438002794, - "grad_norm": 2.6002488136291504, - "learning_rate": 2.4989244269995345e-05, - "loss": 0.5747, - "step": 169750 - }, - { - "epoch": 1.5007337470605915, - "grad_norm": 3.154245376586914, - "learning_rate": 2.4987770882323473e-05, - "loss": 0.6492, - "step": 169760 - }, - { - "epoch": 1.5008221503209038, - "grad_norm": 2.0533437728881836, - "learning_rate": 2.49862974946516e-05, - "loss": 0.5728, - "step": 169770 - }, - { - "epoch": 1.5009105535812162, - "grad_norm": 3.662990093231201, - "learning_rate": 2.4984824106979733e-05, - "loss": 0.5604, - "step": 169780 - }, - { - "epoch": 1.5009989568415283, - "grad_norm": 1.4692844152450562, - "learning_rate": 2.4983350719307862e-05, - "loss": 0.5606, - "step": 169790 - }, - { - "epoch": 1.5010873601018404, - "grad_norm": 1.2336918115615845, - "learning_rate": 2.498187733163599e-05, - "loss": 0.5632, - "step": 169800 - }, - { - "epoch": 1.5011757633621527, - "grad_norm": 10.304373741149902, - "learning_rate": 2.4980403943964122e-05, - "loss": 0.6256, - "step": 169810 - }, - { - "epoch": 1.501264166622465, - "grad_norm": 1.5401891469955444, - "learning_rate": 2.497893055629225e-05, - "loss": 0.5284, - "step": 169820 - }, - { - "epoch": 1.5013525698827772, - "grad_norm": 2.591830253601074, - "learning_rate": 2.497745716862038e-05, - "loss": 0.5906, - "step": 169830 - }, - { - "epoch": 1.5014409731430896, - "grad_norm": 2.6223156452178955, - "learning_rate": 2.497598378094851e-05, - "loss": 0.4506, - "step": 169840 - }, - { - "epoch": 1.501529376403402, - "grad_norm": 1.4776949882507324, - "learning_rate": 2.497451039327664e-05, - "loss": 0.6062, - "step": 169850 - }, - { - "epoch": 1.501617779663714, - "grad_norm": 1.1779335737228394, - "learning_rate": 2.4973037005604767e-05, - "loss": 0.4831, - "step": 169860 - }, - { - "epoch": 1.5017061829240261, - "grad_norm": 2.3911023139953613, - "learning_rate": 2.49715636179329e-05, - "loss": 0.6537, - "step": 169870 - }, - { - "epoch": 1.5017945861843385, - "grad_norm": 0.955909252166748, - "learning_rate": 2.4970090230261027e-05, - "loss": 0.4837, - "step": 169880 - }, - { - "epoch": 1.5018829894446508, - "grad_norm": 2.4581687450408936, - "learning_rate": 2.4968616842589155e-05, - "loss": 0.5608, - "step": 169890 - }, - { - "epoch": 1.501971392704963, - "grad_norm": 13.110949516296387, - "learning_rate": 2.4967143454917287e-05, - "loss": 0.6386, - "step": 169900 - }, - { - "epoch": 1.502059795965275, - "grad_norm": 15.316435813903809, - "learning_rate": 2.4965670067245416e-05, - "loss": 0.4615, - "step": 169910 - }, - { - "epoch": 1.5021481992255874, - "grad_norm": 2.9901394844055176, - "learning_rate": 2.4964196679573544e-05, - "loss": 0.7241, - "step": 169920 - }, - { - "epoch": 1.5022366024858997, - "grad_norm": 3.714289426803589, - "learning_rate": 2.4962723291901676e-05, - "loss": 0.54, - "step": 169930 - }, - { - "epoch": 1.5023250057462119, - "grad_norm": 1.6649699211120605, - "learning_rate": 2.4961249904229804e-05, - "loss": 0.5663, - "step": 169940 - }, - { - "epoch": 1.5024134090065242, - "grad_norm": 12.405657768249512, - "learning_rate": 2.4959776516557932e-05, - "loss": 0.6212, - "step": 169950 - }, - { - "epoch": 1.5025018122668365, - "grad_norm": 2.0155158042907715, - "learning_rate": 2.4958303128886064e-05, - "loss": 0.5299, - "step": 169960 - }, - { - "epoch": 1.5025902155271487, - "grad_norm": 3.5121774673461914, - "learning_rate": 2.4956829741214192e-05, - "loss": 0.5956, - "step": 169970 - }, - { - "epoch": 1.5026786187874608, - "grad_norm": 4.991064548492432, - "learning_rate": 2.495535635354232e-05, - "loss": 0.646, - "step": 169980 - }, - { - "epoch": 1.5027670220477731, - "grad_norm": 2.6751155853271484, - "learning_rate": 2.495388296587045e-05, - "loss": 0.5725, - "step": 169990 - }, - { - "epoch": 1.5028554253080855, - "grad_norm": 5.514303684234619, - "learning_rate": 2.495240957819858e-05, - "loss": 0.69, - "step": 170000 - }, - { - "epoch": 1.5029438285683976, - "grad_norm": 2.0796127319335938, - "learning_rate": 2.495093619052671e-05, - "loss": 0.6217, - "step": 170010 - }, - { - "epoch": 1.5030322318287097, - "grad_norm": 3.1454861164093018, - "learning_rate": 2.4949462802854838e-05, - "loss": 0.619, - "step": 170020 - }, - { - "epoch": 1.503120635089022, - "grad_norm": 2.3838064670562744, - "learning_rate": 2.4947989415182966e-05, - "loss": 0.72, - "step": 170030 - }, - { - "epoch": 1.5032090383493344, - "grad_norm": 9.640490531921387, - "learning_rate": 2.4946516027511098e-05, - "loss": 0.5371, - "step": 170040 - }, - { - "epoch": 1.5032974416096465, - "grad_norm": 3.0396595001220703, - "learning_rate": 2.4945042639839226e-05, - "loss": 0.614, - "step": 170050 - }, - { - "epoch": 1.5033858448699589, - "grad_norm": 2.500504732131958, - "learning_rate": 2.4943569252167354e-05, - "loss": 0.6032, - "step": 170060 - }, - { - "epoch": 1.5034742481302712, - "grad_norm": 1.6776894330978394, - "learning_rate": 2.4942095864495486e-05, - "loss": 0.5255, - "step": 170070 - }, - { - "epoch": 1.5035626513905833, - "grad_norm": 3.970067024230957, - "learning_rate": 2.4940622476823615e-05, - "loss": 0.7731, - "step": 170080 - }, - { - "epoch": 1.5036510546508954, - "grad_norm": 2.090362787246704, - "learning_rate": 2.4939149089151743e-05, - "loss": 0.4771, - "step": 170090 - }, - { - "epoch": 1.5037394579112078, - "grad_norm": 2.3698582649230957, - "learning_rate": 2.493767570147987e-05, - "loss": 0.744, - "step": 170100 - }, - { - "epoch": 1.5038278611715201, - "grad_norm": 2.4198873043060303, - "learning_rate": 2.4936202313808003e-05, - "loss": 0.4677, - "step": 170110 - }, - { - "epoch": 1.5039162644318322, - "grad_norm": 3.272989511489868, - "learning_rate": 2.493472892613613e-05, - "loss": 0.6182, - "step": 170120 - }, - { - "epoch": 1.5040046676921444, - "grad_norm": 2.344414234161377, - "learning_rate": 2.493325553846426e-05, - "loss": 0.7147, - "step": 170130 - }, - { - "epoch": 1.5040930709524567, - "grad_norm": 14.925524711608887, - "learning_rate": 2.493178215079239e-05, - "loss": 0.6598, - "step": 170140 - }, - { - "epoch": 1.504181474212769, - "grad_norm": 1.4058020114898682, - "learning_rate": 2.493030876312052e-05, - "loss": 0.5053, - "step": 170150 - }, - { - "epoch": 1.5042698774730812, - "grad_norm": 1.8325374126434326, - "learning_rate": 2.4928835375448648e-05, - "loss": 0.4343, - "step": 170160 - }, - { - "epoch": 1.5043582807333933, - "grad_norm": 2.2944347858428955, - "learning_rate": 2.4927361987776776e-05, - "loss": 0.5292, - "step": 170170 - }, - { - "epoch": 1.5044466839937058, - "grad_norm": 1.1431275606155396, - "learning_rate": 2.4925888600104908e-05, - "loss": 0.6514, - "step": 170180 - }, - { - "epoch": 1.504535087254018, - "grad_norm": 2.4520976543426514, - "learning_rate": 2.4924415212433037e-05, - "loss": 0.5479, - "step": 170190 - }, - { - "epoch": 1.50462349051433, - "grad_norm": 2.191321611404419, - "learning_rate": 2.4922941824761165e-05, - "loss": 0.5846, - "step": 170200 - }, - { - "epoch": 1.5047118937746424, - "grad_norm": 5.109443664550781, - "learning_rate": 2.4921468437089293e-05, - "loss": 0.6829, - "step": 170210 - }, - { - "epoch": 1.5048002970349548, - "grad_norm": 1.2909646034240723, - "learning_rate": 2.4919995049417425e-05, - "loss": 0.55, - "step": 170220 - }, - { - "epoch": 1.504888700295267, - "grad_norm": 1.490598201751709, - "learning_rate": 2.4918521661745553e-05, - "loss": 0.6029, - "step": 170230 - }, - { - "epoch": 1.504977103555579, - "grad_norm": 8.929780006408691, - "learning_rate": 2.4917048274073682e-05, - "loss": 0.4641, - "step": 170240 - }, - { - "epoch": 1.5050655068158914, - "grad_norm": 6.032838344573975, - "learning_rate": 2.4915574886401813e-05, - "loss": 0.6392, - "step": 170250 - }, - { - "epoch": 1.5051539100762037, - "grad_norm": 1.4174580574035645, - "learning_rate": 2.4914101498729942e-05, - "loss": 0.5078, - "step": 170260 - }, - { - "epoch": 1.5052423133365158, - "grad_norm": 1.6984152793884277, - "learning_rate": 2.491262811105807e-05, - "loss": 0.6429, - "step": 170270 - }, - { - "epoch": 1.505330716596828, - "grad_norm": 1.713139533996582, - "learning_rate": 2.49111547233862e-05, - "loss": 0.6461, - "step": 170280 - }, - { - "epoch": 1.5054191198571405, - "grad_norm": 19.080305099487305, - "learning_rate": 2.490968133571433e-05, - "loss": 0.7372, - "step": 170290 - }, - { - "epoch": 1.5055075231174526, - "grad_norm": 2.627061128616333, - "learning_rate": 2.490820794804246e-05, - "loss": 0.5528, - "step": 170300 - }, - { - "epoch": 1.5055959263777647, - "grad_norm": 4.7588419914245605, - "learning_rate": 2.4906734560370587e-05, - "loss": 0.6198, - "step": 170310 - }, - { - "epoch": 1.505684329638077, - "grad_norm": 0.9784073829650879, - "learning_rate": 2.4905261172698715e-05, - "loss": 0.423, - "step": 170320 - }, - { - "epoch": 1.5057727328983894, - "grad_norm": 3.155487537384033, - "learning_rate": 2.4903787785026847e-05, - "loss": 0.6677, - "step": 170330 - }, - { - "epoch": 1.5058611361587015, - "grad_norm": 3.9082417488098145, - "learning_rate": 2.4902314397354975e-05, - "loss": 0.5043, - "step": 170340 - }, - { - "epoch": 1.5059495394190137, - "grad_norm": 1.0406138896942139, - "learning_rate": 2.4900841009683104e-05, - "loss": 0.4758, - "step": 170350 - }, - { - "epoch": 1.506037942679326, - "grad_norm": 3.066814661026001, - "learning_rate": 2.4899367622011236e-05, - "loss": 0.5702, - "step": 170360 - }, - { - "epoch": 1.5061263459396383, - "grad_norm": 6.921902656555176, - "learning_rate": 2.4897894234339364e-05, - "loss": 0.6582, - "step": 170370 - }, - { - "epoch": 1.5062147491999505, - "grad_norm": 6.363106727600098, - "learning_rate": 2.4896420846667492e-05, - "loss": 0.754, - "step": 170380 - }, - { - "epoch": 1.5063031524602626, - "grad_norm": 9.011924743652344, - "learning_rate": 2.489494745899562e-05, - "loss": 0.5578, - "step": 170390 - }, - { - "epoch": 1.506391555720575, - "grad_norm": 2.6091227531433105, - "learning_rate": 2.4893474071323752e-05, - "loss": 0.6223, - "step": 170400 - }, - { - "epoch": 1.5064799589808873, - "grad_norm": 10.137401580810547, - "learning_rate": 2.489200068365188e-05, - "loss": 0.5603, - "step": 170410 - }, - { - "epoch": 1.5065683622411994, - "grad_norm": 5.111581325531006, - "learning_rate": 2.489052729598001e-05, - "loss": 0.5112, - "step": 170420 - }, - { - "epoch": 1.5066567655015117, - "grad_norm": 1.9903699159622192, - "learning_rate": 2.488905390830814e-05, - "loss": 0.5632, - "step": 170430 - }, - { - "epoch": 1.506745168761824, - "grad_norm": 2.1301333904266357, - "learning_rate": 2.488758052063627e-05, - "loss": 0.6118, - "step": 170440 - }, - { - "epoch": 1.5068335720221362, - "grad_norm": 2.1307950019836426, - "learning_rate": 2.4886107132964397e-05, - "loss": 0.6598, - "step": 170450 - }, - { - "epoch": 1.5069219752824483, - "grad_norm": 2.33685302734375, - "learning_rate": 2.4884633745292526e-05, - "loss": 0.5392, - "step": 170460 - }, - { - "epoch": 1.5070103785427607, - "grad_norm": 0.938329815864563, - "learning_rate": 2.4883160357620658e-05, - "loss": 0.533, - "step": 170470 - }, - { - "epoch": 1.507098781803073, - "grad_norm": 5.397551536560059, - "learning_rate": 2.4881686969948786e-05, - "loss": 0.6804, - "step": 170480 - }, - { - "epoch": 1.5071871850633851, - "grad_norm": 5.188610076904297, - "learning_rate": 2.4880213582276914e-05, - "loss": 0.6573, - "step": 170490 - }, - { - "epoch": 1.5072755883236972, - "grad_norm": 1.6878573894500732, - "learning_rate": 2.4878740194605043e-05, - "loss": 0.5312, - "step": 170500 - }, - { - "epoch": 1.5073639915840096, - "grad_norm": 1.3941048383712769, - "learning_rate": 2.4877266806933174e-05, - "loss": 0.6473, - "step": 170510 - }, - { - "epoch": 1.507452394844322, - "grad_norm": 4.4079718589782715, - "learning_rate": 2.4875793419261303e-05, - "loss": 0.7942, - "step": 170520 - }, - { - "epoch": 1.507540798104634, - "grad_norm": 1.4802908897399902, - "learning_rate": 2.487432003158943e-05, - "loss": 0.576, - "step": 170530 - }, - { - "epoch": 1.5076292013649464, - "grad_norm": 1.521902322769165, - "learning_rate": 2.4872846643917563e-05, - "loss": 0.5962, - "step": 170540 - }, - { - "epoch": 1.5077176046252587, - "grad_norm": 2.56095290184021, - "learning_rate": 2.487137325624569e-05, - "loss": 0.5965, - "step": 170550 - }, - { - "epoch": 1.5078060078855708, - "grad_norm": 2.7927162647247314, - "learning_rate": 2.486989986857382e-05, - "loss": 0.558, - "step": 170560 - }, - { - "epoch": 1.507894411145883, - "grad_norm": 1.9829643964767456, - "learning_rate": 2.4868426480901948e-05, - "loss": 0.5682, - "step": 170570 - }, - { - "epoch": 1.5079828144061953, - "grad_norm": 1.9201500415802002, - "learning_rate": 2.486695309323008e-05, - "loss": 0.7038, - "step": 170580 - }, - { - "epoch": 1.5080712176665076, - "grad_norm": 8.51537799835205, - "learning_rate": 2.4865479705558208e-05, - "loss": 0.6354, - "step": 170590 - }, - { - "epoch": 1.5081596209268198, - "grad_norm": 2.365753412246704, - "learning_rate": 2.4864006317886336e-05, - "loss": 0.5691, - "step": 170600 - }, - { - "epoch": 1.5082480241871319, - "grad_norm": 4.4125823974609375, - "learning_rate": 2.4862532930214468e-05, - "loss": 0.5021, - "step": 170610 - }, - { - "epoch": 1.5083364274474442, - "grad_norm": 2.620122194290161, - "learning_rate": 2.4861059542542596e-05, - "loss": 0.6237, - "step": 170620 - }, - { - "epoch": 1.5084248307077566, - "grad_norm": 14.48987865447998, - "learning_rate": 2.4859586154870725e-05, - "loss": 0.5036, - "step": 170630 - }, - { - "epoch": 1.5085132339680687, - "grad_norm": 2.271885395050049, - "learning_rate": 2.4858112767198853e-05, - "loss": 0.6153, - "step": 170640 - }, - { - "epoch": 1.508601637228381, - "grad_norm": 13.027172088623047, - "learning_rate": 2.4856639379526985e-05, - "loss": 0.5818, - "step": 170650 - }, - { - "epoch": 1.5086900404886934, - "grad_norm": 5.975130081176758, - "learning_rate": 2.4855165991855113e-05, - "loss": 0.4836, - "step": 170660 - }, - { - "epoch": 1.5087784437490055, - "grad_norm": 1.2923355102539062, - "learning_rate": 2.485369260418324e-05, - "loss": 0.6044, - "step": 170670 - }, - { - "epoch": 1.5088668470093176, - "grad_norm": 4.037400245666504, - "learning_rate": 2.485221921651137e-05, - "loss": 0.6181, - "step": 170680 - }, - { - "epoch": 1.50895525026963, - "grad_norm": 4.225475311279297, - "learning_rate": 2.4850745828839502e-05, - "loss": 0.5584, - "step": 170690 - }, - { - "epoch": 1.5090436535299423, - "grad_norm": 1.4339970350265503, - "learning_rate": 2.484927244116763e-05, - "loss": 0.4635, - "step": 170700 - }, - { - "epoch": 1.5091320567902544, - "grad_norm": 2.196194887161255, - "learning_rate": 2.484779905349576e-05, - "loss": 0.5799, - "step": 170710 - }, - { - "epoch": 1.5092204600505665, - "grad_norm": 10.085076332092285, - "learning_rate": 2.484632566582389e-05, - "loss": 0.6642, - "step": 170720 - }, - { - "epoch": 1.5093088633108789, - "grad_norm": 3.725231409072876, - "learning_rate": 2.484485227815202e-05, - "loss": 0.5709, - "step": 170730 - }, - { - "epoch": 1.5093972665711912, - "grad_norm": 3.753584146499634, - "learning_rate": 2.4843378890480147e-05, - "loss": 0.6377, - "step": 170740 - }, - { - "epoch": 1.5094856698315033, - "grad_norm": 3.035900831222534, - "learning_rate": 2.484190550280828e-05, - "loss": 0.5406, - "step": 170750 - }, - { - "epoch": 1.5095740730918155, - "grad_norm": 2.5373897552490234, - "learning_rate": 2.4840432115136407e-05, - "loss": 0.526, - "step": 170760 - }, - { - "epoch": 1.509662476352128, - "grad_norm": 1.967418909072876, - "learning_rate": 2.4838958727464535e-05, - "loss": 0.5389, - "step": 170770 - }, - { - "epoch": 1.5097508796124401, - "grad_norm": 0.7998499870300293, - "learning_rate": 2.4837485339792667e-05, - "loss": 0.6189, - "step": 170780 - }, - { - "epoch": 1.5098392828727523, - "grad_norm": 1.803086519241333, - "learning_rate": 2.4836011952120795e-05, - "loss": 0.4854, - "step": 170790 - }, - { - "epoch": 1.5099276861330646, - "grad_norm": 2.995997190475464, - "learning_rate": 2.4834538564448924e-05, - "loss": 0.8179, - "step": 170800 - }, - { - "epoch": 1.510016089393377, - "grad_norm": 6.703754901885986, - "learning_rate": 2.4833065176777056e-05, - "loss": 0.5792, - "step": 170810 - }, - { - "epoch": 1.510104492653689, - "grad_norm": 2.998743772506714, - "learning_rate": 2.4831591789105184e-05, - "loss": 0.6407, - "step": 170820 - }, - { - "epoch": 1.5101928959140012, - "grad_norm": 1.580256462097168, - "learning_rate": 2.4830118401433312e-05, - "loss": 0.5288, - "step": 170830 - }, - { - "epoch": 1.5102812991743135, - "grad_norm": 2.933837652206421, - "learning_rate": 2.4828645013761444e-05, - "loss": 0.5742, - "step": 170840 - }, - { - "epoch": 1.5103697024346259, - "grad_norm": 1.6946322917938232, - "learning_rate": 2.4827171626089572e-05, - "loss": 0.5555, - "step": 170850 - }, - { - "epoch": 1.510458105694938, - "grad_norm": 1.2750314474105835, - "learning_rate": 2.48256982384177e-05, - "loss": 0.4955, - "step": 170860 - }, - { - "epoch": 1.5105465089552501, - "grad_norm": 10.534096717834473, - "learning_rate": 2.4824224850745832e-05, - "loss": 0.4817, - "step": 170870 - }, - { - "epoch": 1.5106349122155627, - "grad_norm": 1.5416467189788818, - "learning_rate": 2.482275146307396e-05, - "loss": 0.5073, - "step": 170880 - }, - { - "epoch": 1.5107233154758748, - "grad_norm": 1.9794907569885254, - "learning_rate": 2.482127807540209e-05, - "loss": 0.5094, - "step": 170890 - }, - { - "epoch": 1.510811718736187, - "grad_norm": 3.277860403060913, - "learning_rate": 2.481980468773022e-05, - "loss": 0.5946, - "step": 170900 - }, - { - "epoch": 1.5109001219964993, - "grad_norm": 8.792659759521484, - "learning_rate": 2.481833130005835e-05, - "loss": 0.6764, - "step": 170910 - }, - { - "epoch": 1.5109885252568116, - "grad_norm": 1.4864585399627686, - "learning_rate": 2.4816857912386478e-05, - "loss": 0.528, - "step": 170920 - }, - { - "epoch": 1.5110769285171237, - "grad_norm": 2.291426420211792, - "learning_rate": 2.4815384524714606e-05, - "loss": 0.6346, - "step": 170930 - }, - { - "epoch": 1.5111653317774358, - "grad_norm": 5.386637210845947, - "learning_rate": 2.4813911137042738e-05, - "loss": 0.5311, - "step": 170940 - }, - { - "epoch": 1.5112537350377482, - "grad_norm": 8.516998291015625, - "learning_rate": 2.4812437749370866e-05, - "loss": 0.5453, - "step": 170950 - }, - { - "epoch": 1.5113421382980605, - "grad_norm": 7.321317672729492, - "learning_rate": 2.4810964361698994e-05, - "loss": 0.5791, - "step": 170960 - }, - { - "epoch": 1.5114305415583726, - "grad_norm": 2.563375949859619, - "learning_rate": 2.4809490974027123e-05, - "loss": 0.6834, - "step": 170970 - }, - { - "epoch": 1.5115189448186848, - "grad_norm": 22.751176834106445, - "learning_rate": 2.4808017586355254e-05, - "loss": 0.5943, - "step": 170980 - }, - { - "epoch": 1.511607348078997, - "grad_norm": 3.201568603515625, - "learning_rate": 2.4806544198683383e-05, - "loss": 0.5685, - "step": 170990 - }, - { - "epoch": 1.5116957513393094, - "grad_norm": 1.581493854522705, - "learning_rate": 2.480507081101151e-05, - "loss": 0.6274, - "step": 171000 - }, - { - "epoch": 1.5117841545996216, - "grad_norm": 1.1330076456069946, - "learning_rate": 2.4803597423339643e-05, - "loss": 0.4908, - "step": 171010 - }, - { - "epoch": 1.511872557859934, - "grad_norm": 1.00552499294281, - "learning_rate": 2.480212403566777e-05, - "loss": 0.426, - "step": 171020 - }, - { - "epoch": 1.5119609611202462, - "grad_norm": 6.762635231018066, - "learning_rate": 2.48006506479959e-05, - "loss": 0.498, - "step": 171030 - }, - { - "epoch": 1.5120493643805584, - "grad_norm": 8.788949966430664, - "learning_rate": 2.4799177260324028e-05, - "loss": 0.521, - "step": 171040 - }, - { - "epoch": 1.5121377676408705, - "grad_norm": 1.3053117990493774, - "learning_rate": 2.479770387265216e-05, - "loss": 0.6146, - "step": 171050 - }, - { - "epoch": 1.5122261709011828, - "grad_norm": 13.430984497070312, - "learning_rate": 2.4796230484980288e-05, - "loss": 0.6675, - "step": 171060 - }, - { - "epoch": 1.5123145741614952, - "grad_norm": 2.0273873805999756, - "learning_rate": 2.4794757097308416e-05, - "loss": 0.6122, - "step": 171070 - }, - { - "epoch": 1.5124029774218073, - "grad_norm": 2.3378665447235107, - "learning_rate": 2.4793283709636548e-05, - "loss": 0.5783, - "step": 171080 - }, - { - "epoch": 1.5124913806821194, - "grad_norm": 8.47735595703125, - "learning_rate": 2.4791810321964677e-05, - "loss": 0.5486, - "step": 171090 - }, - { - "epoch": 1.5125797839424318, - "grad_norm": 2.7977492809295654, - "learning_rate": 2.4790336934292805e-05, - "loss": 0.6106, - "step": 171100 - }, - { - "epoch": 1.512668187202744, - "grad_norm": 13.27365779876709, - "learning_rate": 2.4788863546620933e-05, - "loss": 0.695, - "step": 171110 - }, - { - "epoch": 1.5127565904630562, - "grad_norm": 15.970056533813477, - "learning_rate": 2.4787390158949065e-05, - "loss": 0.6467, - "step": 171120 - }, - { - "epoch": 1.5128449937233686, - "grad_norm": 1.6792930364608765, - "learning_rate": 2.4785916771277193e-05, - "loss": 0.5215, - "step": 171130 - }, - { - "epoch": 1.512933396983681, - "grad_norm": 9.103386878967285, - "learning_rate": 2.478444338360532e-05, - "loss": 0.6298, - "step": 171140 - }, - { - "epoch": 1.513021800243993, - "grad_norm": 6.686374187469482, - "learning_rate": 2.478296999593345e-05, - "loss": 0.4883, - "step": 171150 - }, - { - "epoch": 1.5131102035043051, - "grad_norm": 1.2885302305221558, - "learning_rate": 2.4781496608261582e-05, - "loss": 0.6489, - "step": 171160 - }, - { - "epoch": 1.5131986067646175, - "grad_norm": 2.2273309230804443, - "learning_rate": 2.478002322058971e-05, - "loss": 0.608, - "step": 171170 - }, - { - "epoch": 1.5132870100249298, - "grad_norm": 1.2896255254745483, - "learning_rate": 2.477854983291784e-05, - "loss": 0.5715, - "step": 171180 - }, - { - "epoch": 1.513375413285242, - "grad_norm": 1.0402296781539917, - "learning_rate": 2.477707644524597e-05, - "loss": 0.4456, - "step": 171190 - }, - { - "epoch": 1.513463816545554, - "grad_norm": 2.567488193511963, - "learning_rate": 2.47756030575741e-05, - "loss": 0.5819, - "step": 171200 - }, - { - "epoch": 1.5135522198058664, - "grad_norm": 1.212886095046997, - "learning_rate": 2.4774129669902227e-05, - "loss": 0.4918, - "step": 171210 - }, - { - "epoch": 1.5136406230661787, - "grad_norm": 3.427006244659424, - "learning_rate": 2.4772656282230355e-05, - "loss": 0.5295, - "step": 171220 - }, - { - "epoch": 1.5137290263264909, - "grad_norm": 1.9608615636825562, - "learning_rate": 2.4771182894558487e-05, - "loss": 0.56, - "step": 171230 - }, - { - "epoch": 1.5138174295868032, - "grad_norm": 2.8133926391601562, - "learning_rate": 2.4769709506886615e-05, - "loss": 0.573, - "step": 171240 - }, - { - "epoch": 1.5139058328471156, - "grad_norm": 1.1024353504180908, - "learning_rate": 2.4768236119214744e-05, - "loss": 0.645, - "step": 171250 - }, - { - "epoch": 1.5139942361074277, - "grad_norm": 2.839495897293091, - "learning_rate": 2.4766762731542875e-05, - "loss": 0.5933, - "step": 171260 - }, - { - "epoch": 1.5140826393677398, - "grad_norm": 7.581789970397949, - "learning_rate": 2.4765289343871004e-05, - "loss": 0.6872, - "step": 171270 - }, - { - "epoch": 1.5141710426280521, - "grad_norm": 2.31018328666687, - "learning_rate": 2.4763815956199132e-05, - "loss": 0.5183, - "step": 171280 - }, - { - "epoch": 1.5142594458883645, - "grad_norm": 1.6008466482162476, - "learning_rate": 2.476234256852726e-05, - "loss": 0.5156, - "step": 171290 - }, - { - "epoch": 1.5143478491486766, - "grad_norm": 4.402003288269043, - "learning_rate": 2.4760869180855392e-05, - "loss": 0.6073, - "step": 171300 - }, - { - "epoch": 1.5144362524089887, - "grad_norm": 1.1848864555358887, - "learning_rate": 2.475939579318352e-05, - "loss": 0.559, - "step": 171310 - }, - { - "epoch": 1.514524655669301, - "grad_norm": 3.13996958732605, - "learning_rate": 2.475792240551165e-05, - "loss": 0.7112, - "step": 171320 - }, - { - "epoch": 1.5146130589296134, - "grad_norm": 4.38145637512207, - "learning_rate": 2.4756449017839777e-05, - "loss": 0.6513, - "step": 171330 - }, - { - "epoch": 1.5147014621899255, - "grad_norm": 1.2336317300796509, - "learning_rate": 2.475497563016791e-05, - "loss": 0.6886, - "step": 171340 - }, - { - "epoch": 1.5147898654502379, - "grad_norm": 0.7369675636291504, - "learning_rate": 2.4753502242496037e-05, - "loss": 0.5116, - "step": 171350 - }, - { - "epoch": 1.5148782687105502, - "grad_norm": 0.9784601330757141, - "learning_rate": 2.4752028854824166e-05, - "loss": 0.5885, - "step": 171360 - }, - { - "epoch": 1.5149666719708623, - "grad_norm": 2.509460210800171, - "learning_rate": 2.4750555467152298e-05, - "loss": 0.6568, - "step": 171370 - }, - { - "epoch": 1.5150550752311744, - "grad_norm": 2.559572696685791, - "learning_rate": 2.4749082079480426e-05, - "loss": 0.5385, - "step": 171380 - }, - { - "epoch": 1.5151434784914868, - "grad_norm": 4.249107837677002, - "learning_rate": 2.4747608691808554e-05, - "loss": 0.6195, - "step": 171390 - }, - { - "epoch": 1.5152318817517991, - "grad_norm": 0.9475095272064209, - "learning_rate": 2.4746135304136683e-05, - "loss": 0.5925, - "step": 171400 - }, - { - "epoch": 1.5153202850121112, - "grad_norm": 1.9829034805297852, - "learning_rate": 2.4744661916464814e-05, - "loss": 0.6953, - "step": 171410 - }, - { - "epoch": 1.5154086882724234, - "grad_norm": 3.5664305686950684, - "learning_rate": 2.4743188528792943e-05, - "loss": 0.5652, - "step": 171420 - }, - { - "epoch": 1.5154970915327357, - "grad_norm": 6.073277950286865, - "learning_rate": 2.474171514112107e-05, - "loss": 0.6489, - "step": 171430 - }, - { - "epoch": 1.515585494793048, - "grad_norm": 1.3370921611785889, - "learning_rate": 2.47402417534492e-05, - "loss": 0.5475, - "step": 171440 - }, - { - "epoch": 1.5156738980533602, - "grad_norm": 2.5059022903442383, - "learning_rate": 2.473876836577733e-05, - "loss": 0.547, - "step": 171450 - }, - { - "epoch": 1.5157623013136723, - "grad_norm": 1.5488955974578857, - "learning_rate": 2.473729497810546e-05, - "loss": 0.6194, - "step": 171460 - }, - { - "epoch": 1.5158507045739849, - "grad_norm": 3.342101812362671, - "learning_rate": 2.4735821590433588e-05, - "loss": 0.6635, - "step": 171470 - }, - { - "epoch": 1.515939107834297, - "grad_norm": 3.384246349334717, - "learning_rate": 2.473434820276172e-05, - "loss": 0.5832, - "step": 171480 - }, - { - "epoch": 1.516027511094609, - "grad_norm": 2.568795919418335, - "learning_rate": 2.4732874815089848e-05, - "loss": 0.4513, - "step": 171490 - }, - { - "epoch": 1.5161159143549214, - "grad_norm": 3.3297486305236816, - "learning_rate": 2.4731401427417976e-05, - "loss": 0.7692, - "step": 171500 - }, - { - "epoch": 1.5162043176152338, - "grad_norm": 3.179825782775879, - "learning_rate": 2.4729928039746105e-05, - "loss": 0.6671, - "step": 171510 - }, - { - "epoch": 1.516292720875546, - "grad_norm": 1.2892937660217285, - "learning_rate": 2.4728454652074236e-05, - "loss": 0.5824, - "step": 171520 - }, - { - "epoch": 1.516381124135858, - "grad_norm": 1.6109046936035156, - "learning_rate": 2.4726981264402365e-05, - "loss": 0.6611, - "step": 171530 - }, - { - "epoch": 1.5164695273961704, - "grad_norm": 2.1983642578125, - "learning_rate": 2.4725507876730493e-05, - "loss": 0.6104, - "step": 171540 - }, - { - "epoch": 1.5165579306564827, - "grad_norm": 9.347017288208008, - "learning_rate": 2.4724034489058625e-05, - "loss": 0.4932, - "step": 171550 - }, - { - "epoch": 1.5166463339167948, - "grad_norm": 3.3557119369506836, - "learning_rate": 2.4722561101386753e-05, - "loss": 0.7232, - "step": 171560 - }, - { - "epoch": 1.516734737177107, - "grad_norm": 2.6021599769592285, - "learning_rate": 2.472108771371488e-05, - "loss": 0.5958, - "step": 171570 - }, - { - "epoch": 1.5168231404374193, - "grad_norm": 3.322150707244873, - "learning_rate": 2.471961432604301e-05, - "loss": 0.6378, - "step": 171580 - }, - { - "epoch": 1.5169115436977316, - "grad_norm": 0.9610885977745056, - "learning_rate": 2.471814093837114e-05, - "loss": 0.5144, - "step": 171590 - }, - { - "epoch": 1.5169999469580437, - "grad_norm": 4.658006191253662, - "learning_rate": 2.471666755069927e-05, - "loss": 0.5929, - "step": 171600 - }, - { - "epoch": 1.517088350218356, - "grad_norm": 4.58236837387085, - "learning_rate": 2.47151941630274e-05, - "loss": 0.6988, - "step": 171610 - }, - { - "epoch": 1.5171767534786684, - "grad_norm": 6.1837687492370605, - "learning_rate": 2.4713720775355527e-05, - "loss": 0.614, - "step": 171620 - }, - { - "epoch": 1.5172651567389805, - "grad_norm": 2.1432061195373535, - "learning_rate": 2.471224738768366e-05, - "loss": 0.5013, - "step": 171630 - }, - { - "epoch": 1.5173535599992927, - "grad_norm": 2.070702075958252, - "learning_rate": 2.4710774000011787e-05, - "loss": 0.6305, - "step": 171640 - }, - { - "epoch": 1.517441963259605, - "grad_norm": 1.5412945747375488, - "learning_rate": 2.4709300612339915e-05, - "loss": 0.6614, - "step": 171650 - }, - { - "epoch": 1.5175303665199174, - "grad_norm": 2.47554349899292, - "learning_rate": 2.4707827224668047e-05, - "loss": 0.5223, - "step": 171660 - }, - { - "epoch": 1.5176187697802295, - "grad_norm": 2.456794261932373, - "learning_rate": 2.4706353836996175e-05, - "loss": 0.6242, - "step": 171670 - }, - { - "epoch": 1.5177071730405416, - "grad_norm": 1.1966077089309692, - "learning_rate": 2.4704880449324304e-05, - "loss": 0.5622, - "step": 171680 - }, - { - "epoch": 1.517795576300854, - "grad_norm": 3.458970069885254, - "learning_rate": 2.4703407061652435e-05, - "loss": 0.564, - "step": 171690 - }, - { - "epoch": 1.5178839795611663, - "grad_norm": 2.3334169387817383, - "learning_rate": 2.4701933673980564e-05, - "loss": 0.5938, - "step": 171700 - }, - { - "epoch": 1.5179723828214784, - "grad_norm": 1.6035447120666504, - "learning_rate": 2.4700460286308692e-05, - "loss": 0.5376, - "step": 171710 - }, - { - "epoch": 1.5180607860817907, - "grad_norm": 1.7743476629257202, - "learning_rate": 2.4698986898636824e-05, - "loss": 0.5416, - "step": 171720 - }, - { - "epoch": 1.518149189342103, - "grad_norm": 3.096792697906494, - "learning_rate": 2.4697513510964952e-05, - "loss": 0.6403, - "step": 171730 - }, - { - "epoch": 1.5182375926024152, - "grad_norm": 1.065329909324646, - "learning_rate": 2.469604012329308e-05, - "loss": 0.572, - "step": 171740 - }, - { - "epoch": 1.5183259958627273, - "grad_norm": 2.8391494750976562, - "learning_rate": 2.4694566735621212e-05, - "loss": 0.7039, - "step": 171750 - }, - { - "epoch": 1.5184143991230397, - "grad_norm": 2.3468706607818604, - "learning_rate": 2.469309334794934e-05, - "loss": 0.6517, - "step": 171760 - }, - { - "epoch": 1.518502802383352, - "grad_norm": 3.8764102458953857, - "learning_rate": 2.469161996027747e-05, - "loss": 0.5581, - "step": 171770 - }, - { - "epoch": 1.5185912056436641, - "grad_norm": 1.846274495124817, - "learning_rate": 2.46901465726056e-05, - "loss": 0.7371, - "step": 171780 - }, - { - "epoch": 1.5186796089039762, - "grad_norm": 8.039772987365723, - "learning_rate": 2.468867318493373e-05, - "loss": 0.6233, - "step": 171790 - }, - { - "epoch": 1.5187680121642886, - "grad_norm": 2.1583986282348633, - "learning_rate": 2.4687199797261857e-05, - "loss": 0.582, - "step": 171800 - }, - { - "epoch": 1.518856415424601, - "grad_norm": 3.0295395851135254, - "learning_rate": 2.468572640958999e-05, - "loss": 0.5288, - "step": 171810 - }, - { - "epoch": 1.518944818684913, - "grad_norm": 7.18132209777832, - "learning_rate": 2.4684253021918118e-05, - "loss": 0.6494, - "step": 171820 - }, - { - "epoch": 1.5190332219452254, - "grad_norm": 6.014684200286865, - "learning_rate": 2.4682779634246246e-05, - "loss": 0.5979, - "step": 171830 - }, - { - "epoch": 1.5191216252055377, - "grad_norm": 1.6961146593093872, - "learning_rate": 2.4681306246574378e-05, - "loss": 0.5575, - "step": 171840 - }, - { - "epoch": 1.5192100284658498, - "grad_norm": 1.6369926929473877, - "learning_rate": 2.4679832858902506e-05, - "loss": 0.563, - "step": 171850 - }, - { - "epoch": 1.519298431726162, - "grad_norm": 1.037829041481018, - "learning_rate": 2.4678359471230634e-05, - "loss": 0.5281, - "step": 171860 - }, - { - "epoch": 1.5193868349864743, - "grad_norm": 1.8316482305526733, - "learning_rate": 2.4676886083558763e-05, - "loss": 0.5275, - "step": 171870 - }, - { - "epoch": 1.5194752382467867, - "grad_norm": 3.0014376640319824, - "learning_rate": 2.4675412695886894e-05, - "loss": 0.6327, - "step": 171880 - }, - { - "epoch": 1.5195636415070988, - "grad_norm": 1.9669630527496338, - "learning_rate": 2.4673939308215023e-05, - "loss": 0.4468, - "step": 171890 - }, - { - "epoch": 1.519652044767411, - "grad_norm": 4.4915595054626465, - "learning_rate": 2.467246592054315e-05, - "loss": 0.5648, - "step": 171900 - }, - { - "epoch": 1.5197404480277232, - "grad_norm": 1.330744743347168, - "learning_rate": 2.467099253287128e-05, - "loss": 0.5694, - "step": 171910 - }, - { - "epoch": 1.5198288512880356, - "grad_norm": 3.722687005996704, - "learning_rate": 2.466951914519941e-05, - "loss": 0.5921, - "step": 171920 - }, - { - "epoch": 1.5199172545483477, - "grad_norm": 1.5006351470947266, - "learning_rate": 2.466804575752754e-05, - "loss": 0.6337, - "step": 171930 - }, - { - "epoch": 1.52000565780866, - "grad_norm": 4.495100498199463, - "learning_rate": 2.4666572369855668e-05, - "loss": 0.7132, - "step": 171940 - }, - { - "epoch": 1.5200940610689724, - "grad_norm": 6.383693218231201, - "learning_rate": 2.46650989821838e-05, - "loss": 0.5993, - "step": 171950 - }, - { - "epoch": 1.5201824643292845, - "grad_norm": 1.008862018585205, - "learning_rate": 2.4663625594511928e-05, - "loss": 0.6129, - "step": 171960 - }, - { - "epoch": 1.5202708675895966, - "grad_norm": 2.4121975898742676, - "learning_rate": 2.4662152206840056e-05, - "loss": 0.7452, - "step": 171970 - }, - { - "epoch": 1.520359270849909, - "grad_norm": 1.813094973564148, - "learning_rate": 2.4660678819168185e-05, - "loss": 0.5145, - "step": 171980 - }, - { - "epoch": 1.5204476741102213, - "grad_norm": 1.5578382015228271, - "learning_rate": 2.4659205431496316e-05, - "loss": 0.6159, - "step": 171990 - }, - { - "epoch": 1.5205360773705334, - "grad_norm": 1.8867958784103394, - "learning_rate": 2.4657732043824445e-05, - "loss": 0.485, - "step": 172000 - }, - { - "epoch": 1.5206244806308455, - "grad_norm": 1.0533500909805298, - "learning_rate": 2.4656258656152573e-05, - "loss": 0.6231, - "step": 172010 - }, - { - "epoch": 1.5207128838911579, - "grad_norm": 1.967441439628601, - "learning_rate": 2.4654785268480705e-05, - "loss": 0.6675, - "step": 172020 - }, - { - "epoch": 1.5208012871514702, - "grad_norm": 0.9737207889556885, - "learning_rate": 2.4653311880808833e-05, - "loss": 0.4126, - "step": 172030 - }, - { - "epoch": 1.5208896904117823, - "grad_norm": 2.703584909439087, - "learning_rate": 2.465183849313696e-05, - "loss": 0.4827, - "step": 172040 - }, - { - "epoch": 1.5209780936720945, - "grad_norm": 4.685316562652588, - "learning_rate": 2.465036510546509e-05, - "loss": 0.7228, - "step": 172050 - }, - { - "epoch": 1.521066496932407, - "grad_norm": 2.4175374507904053, - "learning_rate": 2.4648891717793222e-05, - "loss": 0.5838, - "step": 172060 - }, - { - "epoch": 1.5211549001927192, - "grad_norm": 9.572955131530762, - "learning_rate": 2.464741833012135e-05, - "loss": 0.5546, - "step": 172070 - }, - { - "epoch": 1.5212433034530313, - "grad_norm": 10.905763626098633, - "learning_rate": 2.464594494244948e-05, - "loss": 0.6809, - "step": 172080 - }, - { - "epoch": 1.5213317067133436, - "grad_norm": 1.4791529178619385, - "learning_rate": 2.4644471554777607e-05, - "loss": 0.5037, - "step": 172090 - }, - { - "epoch": 1.521420109973656, - "grad_norm": 15.26163101196289, - "learning_rate": 2.464299816710574e-05, - "loss": 0.5762, - "step": 172100 - }, - { - "epoch": 1.521508513233968, - "grad_norm": 1.4081157445907593, - "learning_rate": 2.4641524779433867e-05, - "loss": 0.5283, - "step": 172110 - }, - { - "epoch": 1.5215969164942802, - "grad_norm": 1.4823800325393677, - "learning_rate": 2.4640051391761995e-05, - "loss": 0.603, - "step": 172120 - }, - { - "epoch": 1.5216853197545925, - "grad_norm": 1.1264511346817017, - "learning_rate": 2.4638578004090127e-05, - "loss": 0.6241, - "step": 172130 - }, - { - "epoch": 1.5217737230149049, - "grad_norm": 3.080552816390991, - "learning_rate": 2.4637104616418255e-05, - "loss": 0.6103, - "step": 172140 - }, - { - "epoch": 1.521862126275217, - "grad_norm": 8.09986686706543, - "learning_rate": 2.4635631228746384e-05, - "loss": 0.6419, - "step": 172150 - }, - { - "epoch": 1.5219505295355291, - "grad_norm": 2.6569294929504395, - "learning_rate": 2.4634157841074512e-05, - "loss": 0.6264, - "step": 172160 - }, - { - "epoch": 1.5220389327958415, - "grad_norm": 3.802151679992676, - "learning_rate": 2.4632684453402644e-05, - "loss": 0.5923, - "step": 172170 - }, - { - "epoch": 1.5221273360561538, - "grad_norm": 5.553664684295654, - "learning_rate": 2.4631211065730772e-05, - "loss": 0.7305, - "step": 172180 - }, - { - "epoch": 1.522215739316466, - "grad_norm": 2.139594554901123, - "learning_rate": 2.46297376780589e-05, - "loss": 0.5906, - "step": 172190 - }, - { - "epoch": 1.5223041425767783, - "grad_norm": 2.169487476348877, - "learning_rate": 2.4628264290387032e-05, - "loss": 0.4848, - "step": 172200 - }, - { - "epoch": 1.5223925458370906, - "grad_norm": 1.6875172853469849, - "learning_rate": 2.462679090271516e-05, - "loss": 0.6831, - "step": 172210 - }, - { - "epoch": 1.5224809490974027, - "grad_norm": 2.2224791049957275, - "learning_rate": 2.462531751504329e-05, - "loss": 0.5264, - "step": 172220 - }, - { - "epoch": 1.5225693523577148, - "grad_norm": 1.4047622680664062, - "learning_rate": 2.4623844127371417e-05, - "loss": 0.6143, - "step": 172230 - }, - { - "epoch": 1.5226577556180272, - "grad_norm": 1.6402615308761597, - "learning_rate": 2.462237073969955e-05, - "loss": 0.5274, - "step": 172240 - }, - { - "epoch": 1.5227461588783395, - "grad_norm": 2.0935678482055664, - "learning_rate": 2.4620897352027677e-05, - "loss": 0.6121, - "step": 172250 - }, - { - "epoch": 1.5228345621386516, - "grad_norm": 1.708573579788208, - "learning_rate": 2.4619423964355806e-05, - "loss": 0.525, - "step": 172260 - }, - { - "epoch": 1.5229229653989638, - "grad_norm": 2.7750730514526367, - "learning_rate": 2.4617950576683934e-05, - "loss": 0.6079, - "step": 172270 - }, - { - "epoch": 1.523011368659276, - "grad_norm": 5.222714424133301, - "learning_rate": 2.4616477189012066e-05, - "loss": 0.6303, - "step": 172280 - }, - { - "epoch": 1.5230997719195885, - "grad_norm": 3.571319818496704, - "learning_rate": 2.4615003801340194e-05, - "loss": 0.6871, - "step": 172290 - }, - { - "epoch": 1.5231881751799006, - "grad_norm": 6.444335460662842, - "learning_rate": 2.4613530413668323e-05, - "loss": 0.7221, - "step": 172300 - }, - { - "epoch": 1.523276578440213, - "grad_norm": 3.561591625213623, - "learning_rate": 2.4612057025996454e-05, - "loss": 0.5972, - "step": 172310 - }, - { - "epoch": 1.5233649817005253, - "grad_norm": 2.5546274185180664, - "learning_rate": 2.4610583638324583e-05, - "loss": 0.5113, - "step": 172320 - }, - { - "epoch": 1.5234533849608374, - "grad_norm": 1.630532145500183, - "learning_rate": 2.460911025065271e-05, - "loss": 0.6621, - "step": 172330 - }, - { - "epoch": 1.5235417882211495, - "grad_norm": 2.120156764984131, - "learning_rate": 2.460763686298084e-05, - "loss": 0.5563, - "step": 172340 - }, - { - "epoch": 1.5236301914814618, - "grad_norm": 2.6163783073425293, - "learning_rate": 2.460616347530897e-05, - "loss": 0.6366, - "step": 172350 - }, - { - "epoch": 1.5237185947417742, - "grad_norm": 3.093794345855713, - "learning_rate": 2.46046900876371e-05, - "loss": 0.6206, - "step": 172360 - }, - { - "epoch": 1.5238069980020863, - "grad_norm": 2.097590208053589, - "learning_rate": 2.4603216699965228e-05, - "loss": 0.6025, - "step": 172370 - }, - { - "epoch": 1.5238954012623984, - "grad_norm": 6.869895935058594, - "learning_rate": 2.460174331229336e-05, - "loss": 0.6307, - "step": 172380 - }, - { - "epoch": 1.5239838045227108, - "grad_norm": 1.4451054334640503, - "learning_rate": 2.4600269924621488e-05, - "loss": 0.538, - "step": 172390 - }, - { - "epoch": 1.524072207783023, - "grad_norm": 3.133430242538452, - "learning_rate": 2.4598796536949616e-05, - "loss": 0.5259, - "step": 172400 - }, - { - "epoch": 1.5241606110433352, - "grad_norm": 2.374271869659424, - "learning_rate": 2.4597323149277745e-05, - "loss": 0.6656, - "step": 172410 - }, - { - "epoch": 1.5242490143036476, - "grad_norm": 4.873725891113281, - "learning_rate": 2.4595849761605876e-05, - "loss": 0.5989, - "step": 172420 - }, - { - "epoch": 1.52433741756396, - "grad_norm": 2.8913698196411133, - "learning_rate": 2.4594376373934005e-05, - "loss": 0.6233, - "step": 172430 - }, - { - "epoch": 1.524425820824272, - "grad_norm": 2.974576950073242, - "learning_rate": 2.4592902986262133e-05, - "loss": 0.5089, - "step": 172440 - }, - { - "epoch": 1.5245142240845841, - "grad_norm": 6.621434688568115, - "learning_rate": 2.459142959859026e-05, - "loss": 0.5803, - "step": 172450 - }, - { - "epoch": 1.5246026273448965, - "grad_norm": 1.4491280317306519, - "learning_rate": 2.4589956210918393e-05, - "loss": 0.5787, - "step": 172460 - }, - { - "epoch": 1.5246910306052088, - "grad_norm": 1.1783498525619507, - "learning_rate": 2.458848282324652e-05, - "loss": 0.6971, - "step": 172470 - }, - { - "epoch": 1.524779433865521, - "grad_norm": 1.5873522758483887, - "learning_rate": 2.458700943557465e-05, - "loss": 0.5434, - "step": 172480 - }, - { - "epoch": 1.524867837125833, - "grad_norm": 6.424190044403076, - "learning_rate": 2.458553604790278e-05, - "loss": 0.5754, - "step": 172490 - }, - { - "epoch": 1.5249562403861454, - "grad_norm": 1.5184119939804077, - "learning_rate": 2.458406266023091e-05, - "loss": 0.6341, - "step": 172500 - }, - { - "epoch": 1.5250446436464578, - "grad_norm": 2.2332565784454346, - "learning_rate": 2.458258927255904e-05, - "loss": 0.6545, - "step": 172510 - }, - { - "epoch": 1.5251330469067699, - "grad_norm": 7.326929092407227, - "learning_rate": 2.4581115884887167e-05, - "loss": 0.686, - "step": 172520 - }, - { - "epoch": 1.5252214501670822, - "grad_norm": 2.210966110229492, - "learning_rate": 2.45796424972153e-05, - "loss": 0.5042, - "step": 172530 - }, - { - "epoch": 1.5253098534273946, - "grad_norm": 1.439150094985962, - "learning_rate": 2.4578169109543427e-05, - "loss": 0.6445, - "step": 172540 - }, - { - "epoch": 1.5253982566877067, - "grad_norm": 9.683670997619629, - "learning_rate": 2.4576695721871555e-05, - "loss": 0.5704, - "step": 172550 - }, - { - "epoch": 1.5254866599480188, - "grad_norm": 2.252748489379883, - "learning_rate": 2.4575222334199683e-05, - "loss": 0.6423, - "step": 172560 - }, - { - "epoch": 1.5255750632083311, - "grad_norm": 2.7010691165924072, - "learning_rate": 2.4573748946527815e-05, - "loss": 0.6829, - "step": 172570 - }, - { - "epoch": 1.5256634664686435, - "grad_norm": 4.710685729980469, - "learning_rate": 2.4572275558855944e-05, - "loss": 0.6447, - "step": 172580 - }, - { - "epoch": 1.5257518697289556, - "grad_norm": 0.9347019195556641, - "learning_rate": 2.4570802171184072e-05, - "loss": 0.5261, - "step": 172590 - }, - { - "epoch": 1.5258402729892677, - "grad_norm": 3.8516714572906494, - "learning_rate": 2.4569328783512204e-05, - "loss": 0.5437, - "step": 172600 - }, - { - "epoch": 1.52592867624958, - "grad_norm": 1.5092065334320068, - "learning_rate": 2.4567855395840332e-05, - "loss": 0.6155, - "step": 172610 - }, - { - "epoch": 1.5260170795098924, - "grad_norm": 2.351064443588257, - "learning_rate": 2.456638200816846e-05, - "loss": 0.6252, - "step": 172620 - }, - { - "epoch": 1.5261054827702045, - "grad_norm": 1.4649814367294312, - "learning_rate": 2.4564908620496592e-05, - "loss": 0.4769, - "step": 172630 - }, - { - "epoch": 1.5261938860305166, - "grad_norm": 2.654024600982666, - "learning_rate": 2.456343523282472e-05, - "loss": 0.6006, - "step": 172640 - }, - { - "epoch": 1.5262822892908292, - "grad_norm": 8.104409217834473, - "learning_rate": 2.456196184515285e-05, - "loss": 0.5854, - "step": 172650 - }, - { - "epoch": 1.5263706925511413, - "grad_norm": 6.593908786773682, - "learning_rate": 2.456048845748098e-05, - "loss": 0.5601, - "step": 172660 - }, - { - "epoch": 1.5264590958114534, - "grad_norm": 10.686739921569824, - "learning_rate": 2.455901506980911e-05, - "loss": 0.5823, - "step": 172670 - }, - { - "epoch": 1.5265474990717658, - "grad_norm": 2.0972142219543457, - "learning_rate": 2.4557541682137237e-05, - "loss": 0.5837, - "step": 172680 - }, - { - "epoch": 1.5266359023320781, - "grad_norm": 3.830202102661133, - "learning_rate": 2.455606829446537e-05, - "loss": 0.598, - "step": 172690 - }, - { - "epoch": 1.5267243055923903, - "grad_norm": 1.2510401010513306, - "learning_rate": 2.4554594906793497e-05, - "loss": 0.7634, - "step": 172700 - }, - { - "epoch": 1.5268127088527024, - "grad_norm": 1.7513103485107422, - "learning_rate": 2.4553121519121626e-05, - "loss": 0.5552, - "step": 172710 - }, - { - "epoch": 1.5269011121130147, - "grad_norm": 2.178109884262085, - "learning_rate": 2.4551648131449757e-05, - "loss": 0.624, - "step": 172720 - }, - { - "epoch": 1.526989515373327, - "grad_norm": 1.3737125396728516, - "learning_rate": 2.4550174743777886e-05, - "loss": 0.6883, - "step": 172730 - }, - { - "epoch": 1.5270779186336392, - "grad_norm": 1.8638834953308105, - "learning_rate": 2.4548701356106014e-05, - "loss": 0.4657, - "step": 172740 - }, - { - "epoch": 1.5271663218939513, - "grad_norm": 4.034111976623535, - "learning_rate": 2.4547227968434146e-05, - "loss": 0.6838, - "step": 172750 - }, - { - "epoch": 1.5272547251542636, - "grad_norm": 3.114433526992798, - "learning_rate": 2.4545754580762274e-05, - "loss": 0.6157, - "step": 172760 - }, - { - "epoch": 1.527343128414576, - "grad_norm": 11.8702974319458, - "learning_rate": 2.4544281193090403e-05, - "loss": 0.6267, - "step": 172770 - }, - { - "epoch": 1.527431531674888, - "grad_norm": 1.5491470098495483, - "learning_rate": 2.4542807805418534e-05, - "loss": 0.5999, - "step": 172780 - }, - { - "epoch": 1.5275199349352004, - "grad_norm": 1.8267556428909302, - "learning_rate": 2.4541334417746663e-05, - "loss": 0.6425, - "step": 172790 - }, - { - "epoch": 1.5276083381955128, - "grad_norm": 5.833799362182617, - "learning_rate": 2.453986103007479e-05, - "loss": 0.4546, - "step": 172800 - }, - { - "epoch": 1.527696741455825, - "grad_norm": 4.049919605255127, - "learning_rate": 2.453838764240292e-05, - "loss": 0.6137, - "step": 172810 - }, - { - "epoch": 1.527785144716137, - "grad_norm": 1.2713209390640259, - "learning_rate": 2.453691425473105e-05, - "loss": 0.5319, - "step": 172820 - }, - { - "epoch": 1.5278735479764494, - "grad_norm": 3.581970453262329, - "learning_rate": 2.453544086705918e-05, - "loss": 0.6672, - "step": 172830 - }, - { - "epoch": 1.5279619512367617, - "grad_norm": 1.4777510166168213, - "learning_rate": 2.4533967479387308e-05, - "loss": 0.6279, - "step": 172840 - }, - { - "epoch": 1.5280503544970738, - "grad_norm": 1.2001938819885254, - "learning_rate": 2.453249409171544e-05, - "loss": 0.6069, - "step": 172850 - }, - { - "epoch": 1.528138757757386, - "grad_norm": 1.0515836477279663, - "learning_rate": 2.4531020704043568e-05, - "loss": 0.5836, - "step": 172860 - }, - { - "epoch": 1.5282271610176983, - "grad_norm": 3.4392929077148438, - "learning_rate": 2.4529547316371696e-05, - "loss": 0.5784, - "step": 172870 - }, - { - "epoch": 1.5283155642780106, - "grad_norm": 10.642607688903809, - "learning_rate": 2.4528073928699825e-05, - "loss": 0.6758, - "step": 172880 - }, - { - "epoch": 1.5284039675383227, - "grad_norm": 2.718301773071289, - "learning_rate": 2.4526600541027956e-05, - "loss": 0.5985, - "step": 172890 - }, - { - "epoch": 1.528492370798635, - "grad_norm": 9.04906940460205, - "learning_rate": 2.4525127153356085e-05, - "loss": 0.5806, - "step": 172900 - }, - { - "epoch": 1.5285807740589474, - "grad_norm": 1.8521445989608765, - "learning_rate": 2.4523653765684213e-05, - "loss": 0.509, - "step": 172910 - }, - { - "epoch": 1.5286691773192596, - "grad_norm": 1.8146170377731323, - "learning_rate": 2.452218037801234e-05, - "loss": 0.6793, - "step": 172920 - }, - { - "epoch": 1.5287575805795717, - "grad_norm": 8.418856620788574, - "learning_rate": 2.4520706990340473e-05, - "loss": 0.5273, - "step": 172930 - }, - { - "epoch": 1.528845983839884, - "grad_norm": 5.599218845367432, - "learning_rate": 2.45192336026686e-05, - "loss": 0.5419, - "step": 172940 - }, - { - "epoch": 1.5289343871001964, - "grad_norm": 13.081548690795898, - "learning_rate": 2.451776021499673e-05, - "loss": 0.621, - "step": 172950 - }, - { - "epoch": 1.5290227903605085, - "grad_norm": 1.769519329071045, - "learning_rate": 2.451628682732486e-05, - "loss": 0.545, - "step": 172960 - }, - { - "epoch": 1.5291111936208206, - "grad_norm": 0.8284148573875427, - "learning_rate": 2.451481343965299e-05, - "loss": 0.5714, - "step": 172970 - }, - { - "epoch": 1.529199596881133, - "grad_norm": 3.3695456981658936, - "learning_rate": 2.451334005198112e-05, - "loss": 0.6492, - "step": 172980 - }, - { - "epoch": 1.5292880001414453, - "grad_norm": 12.29538631439209, - "learning_rate": 2.4511866664309247e-05, - "loss": 0.5491, - "step": 172990 - }, - { - "epoch": 1.5293764034017574, - "grad_norm": 1.382818579673767, - "learning_rate": 2.451039327663738e-05, - "loss": 0.7067, - "step": 173000 - }, - { - "epoch": 1.5294648066620697, - "grad_norm": 13.862791061401367, - "learning_rate": 2.4508919888965507e-05, - "loss": 0.6633, - "step": 173010 - }, - { - "epoch": 1.529553209922382, - "grad_norm": 2.1402299404144287, - "learning_rate": 2.4507446501293635e-05, - "loss": 0.6518, - "step": 173020 - }, - { - "epoch": 1.5296416131826942, - "grad_norm": 13.009121894836426, - "learning_rate": 2.4505973113621764e-05, - "loss": 0.7374, - "step": 173030 - }, - { - "epoch": 1.5297300164430063, - "grad_norm": 3.0197532176971436, - "learning_rate": 2.4504499725949895e-05, - "loss": 0.5881, - "step": 173040 - }, - { - "epoch": 1.5298184197033187, - "grad_norm": 3.2528882026672363, - "learning_rate": 2.4503026338278024e-05, - "loss": 0.5221, - "step": 173050 - }, - { - "epoch": 1.529906822963631, - "grad_norm": 6.419832229614258, - "learning_rate": 2.4501552950606152e-05, - "loss": 0.6583, - "step": 173060 - }, - { - "epoch": 1.5299952262239431, - "grad_norm": 14.746634483337402, - "learning_rate": 2.4500079562934284e-05, - "loss": 0.5746, - "step": 173070 - }, - { - "epoch": 1.5300836294842552, - "grad_norm": 1.8239799737930298, - "learning_rate": 2.4498606175262412e-05, - "loss": 0.4518, - "step": 173080 - }, - { - "epoch": 1.5301720327445676, - "grad_norm": 1.7478179931640625, - "learning_rate": 2.449713278759054e-05, - "loss": 0.5104, - "step": 173090 - }, - { - "epoch": 1.53026043600488, - "grad_norm": 2.063530921936035, - "learning_rate": 2.449565939991867e-05, - "loss": 0.5551, - "step": 173100 - }, - { - "epoch": 1.530348839265192, - "grad_norm": 7.434767723083496, - "learning_rate": 2.44941860122468e-05, - "loss": 0.7746, - "step": 173110 - }, - { - "epoch": 1.5304372425255044, - "grad_norm": 5.22447395324707, - "learning_rate": 2.449271262457493e-05, - "loss": 0.6282, - "step": 173120 - }, - { - "epoch": 1.5305256457858167, - "grad_norm": 4.341371536254883, - "learning_rate": 2.4491239236903057e-05, - "loss": 0.6859, - "step": 173130 - }, - { - "epoch": 1.5306140490461289, - "grad_norm": 2.9289324283599854, - "learning_rate": 2.448976584923119e-05, - "loss": 0.6033, - "step": 173140 - }, - { - "epoch": 1.530702452306441, - "grad_norm": 4.917721271514893, - "learning_rate": 2.4488292461559317e-05, - "loss": 0.5989, - "step": 173150 - }, - { - "epoch": 1.5307908555667533, - "grad_norm": 1.901949167251587, - "learning_rate": 2.4486819073887446e-05, - "loss": 0.6076, - "step": 173160 - }, - { - "epoch": 1.5308792588270657, - "grad_norm": 6.93796968460083, - "learning_rate": 2.4485345686215574e-05, - "loss": 0.7159, - "step": 173170 - }, - { - "epoch": 1.5309676620873778, - "grad_norm": 7.3602294921875, - "learning_rate": 2.4483872298543706e-05, - "loss": 0.4781, - "step": 173180 - }, - { - "epoch": 1.53105606534769, - "grad_norm": 2.3484385013580322, - "learning_rate": 2.4482398910871834e-05, - "loss": 0.5808, - "step": 173190 - }, - { - "epoch": 1.5311444686080022, - "grad_norm": 3.399115800857544, - "learning_rate": 2.4480925523199962e-05, - "loss": 0.6417, - "step": 173200 - }, - { - "epoch": 1.5312328718683146, - "grad_norm": 6.387072563171387, - "learning_rate": 2.447945213552809e-05, - "loss": 0.5766, - "step": 173210 - }, - { - "epoch": 1.5313212751286267, - "grad_norm": 6.067452907562256, - "learning_rate": 2.4477978747856223e-05, - "loss": 0.6603, - "step": 173220 - }, - { - "epoch": 1.5314096783889388, - "grad_norm": 4.355941295623779, - "learning_rate": 2.447650536018435e-05, - "loss": 0.6971, - "step": 173230 - }, - { - "epoch": 1.5314980816492514, - "grad_norm": 1.731299877166748, - "learning_rate": 2.447503197251248e-05, - "loss": 0.5693, - "step": 173240 - }, - { - "epoch": 1.5315864849095635, - "grad_norm": 1.1960794925689697, - "learning_rate": 2.447355858484061e-05, - "loss": 0.5911, - "step": 173250 - }, - { - "epoch": 1.5316748881698756, - "grad_norm": 7.097519874572754, - "learning_rate": 2.447208519716874e-05, - "loss": 0.5844, - "step": 173260 - }, - { - "epoch": 1.531763291430188, - "grad_norm": 4.278408050537109, - "learning_rate": 2.4470611809496868e-05, - "loss": 0.5859, - "step": 173270 - }, - { - "epoch": 1.5318516946905003, - "grad_norm": 3.260118007659912, - "learning_rate": 2.4469138421824996e-05, - "loss": 0.5447, - "step": 173280 - }, - { - "epoch": 1.5319400979508124, - "grad_norm": 0.9700481295585632, - "learning_rate": 2.4467665034153128e-05, - "loss": 0.5629, - "step": 173290 - }, - { - "epoch": 1.5320285012111245, - "grad_norm": 6.768033027648926, - "learning_rate": 2.4466191646481256e-05, - "loss": 0.5933, - "step": 173300 - }, - { - "epoch": 1.532116904471437, - "grad_norm": 1.0262489318847656, - "learning_rate": 2.4464718258809385e-05, - "loss": 0.4784, - "step": 173310 - }, - { - "epoch": 1.5322053077317492, - "grad_norm": 2.351008653640747, - "learning_rate": 2.4463244871137516e-05, - "loss": 0.5758, - "step": 173320 - }, - { - "epoch": 1.5322937109920614, - "grad_norm": 2.3888816833496094, - "learning_rate": 2.4461771483465645e-05, - "loss": 0.6582, - "step": 173330 - }, - { - "epoch": 1.5323821142523735, - "grad_norm": 1.2799350023269653, - "learning_rate": 2.4460298095793773e-05, - "loss": 0.5523, - "step": 173340 - }, - { - "epoch": 1.5324705175126858, - "grad_norm": 2.7748446464538574, - "learning_rate": 2.44588247081219e-05, - "loss": 0.5929, - "step": 173350 - }, - { - "epoch": 1.5325589207729982, - "grad_norm": 1.8020994663238525, - "learning_rate": 2.4457351320450033e-05, - "loss": 0.537, - "step": 173360 - }, - { - "epoch": 1.5326473240333103, - "grad_norm": 2.196061611175537, - "learning_rate": 2.445587793277816e-05, - "loss": 0.56, - "step": 173370 - }, - { - "epoch": 1.5327357272936226, - "grad_norm": 2.0812814235687256, - "learning_rate": 2.445440454510629e-05, - "loss": 0.5524, - "step": 173380 - }, - { - "epoch": 1.532824130553935, - "grad_norm": 1.4463287591934204, - "learning_rate": 2.4452931157434418e-05, - "loss": 0.605, - "step": 173390 - }, - { - "epoch": 1.532912533814247, - "grad_norm": 1.5608011484146118, - "learning_rate": 2.445145776976255e-05, - "loss": 0.7594, - "step": 173400 - }, - { - "epoch": 1.5330009370745592, - "grad_norm": 3.173502206802368, - "learning_rate": 2.4449984382090678e-05, - "loss": 0.6403, - "step": 173410 - }, - { - "epoch": 1.5330893403348715, - "grad_norm": 2.8024933338165283, - "learning_rate": 2.4448510994418807e-05, - "loss": 0.6108, - "step": 173420 - }, - { - "epoch": 1.5331777435951839, - "grad_norm": 2.6264004707336426, - "learning_rate": 2.444703760674694e-05, - "loss": 0.5982, - "step": 173430 - }, - { - "epoch": 1.533266146855496, - "grad_norm": 1.6531671285629272, - "learning_rate": 2.4445564219075067e-05, - "loss": 0.6804, - "step": 173440 - }, - { - "epoch": 1.5333545501158081, - "grad_norm": 2.6333839893341064, - "learning_rate": 2.4444090831403195e-05, - "loss": 0.4988, - "step": 173450 - }, - { - "epoch": 1.5334429533761205, - "grad_norm": 2.5326106548309326, - "learning_rate": 2.4442617443731323e-05, - "loss": 0.5212, - "step": 173460 - }, - { - "epoch": 1.5335313566364328, - "grad_norm": 14.80500316619873, - "learning_rate": 2.4441144056059455e-05, - "loss": 0.496, - "step": 173470 - }, - { - "epoch": 1.533619759896745, - "grad_norm": 0.9426552653312683, - "learning_rate": 2.4439670668387584e-05, - "loss": 0.5625, - "step": 173480 - }, - { - "epoch": 1.5337081631570573, - "grad_norm": 1.0415114164352417, - "learning_rate": 2.4438197280715712e-05, - "loss": 0.6049, - "step": 173490 - }, - { - "epoch": 1.5337965664173696, - "grad_norm": 4.935723304748535, - "learning_rate": 2.443672389304384e-05, - "loss": 0.6203, - "step": 173500 - }, - { - "epoch": 1.5338849696776817, - "grad_norm": 10.291781425476074, - "learning_rate": 2.4435250505371972e-05, - "loss": 0.5572, - "step": 173510 - }, - { - "epoch": 1.5339733729379939, - "grad_norm": 4.828888893127441, - "learning_rate": 2.44337771177001e-05, - "loss": 0.532, - "step": 173520 - }, - { - "epoch": 1.5340617761983062, - "grad_norm": 3.1975908279418945, - "learning_rate": 2.443230373002823e-05, - "loss": 0.5609, - "step": 173530 - }, - { - "epoch": 1.5341501794586185, - "grad_norm": 1.3279147148132324, - "learning_rate": 2.443083034235636e-05, - "loss": 0.4884, - "step": 173540 - }, - { - "epoch": 1.5342385827189307, - "grad_norm": 4.276978015899658, - "learning_rate": 2.442935695468449e-05, - "loss": 0.6079, - "step": 173550 - }, - { - "epoch": 1.5343269859792428, - "grad_norm": 8.07887077331543, - "learning_rate": 2.442788356701262e-05, - "loss": 0.5453, - "step": 173560 - }, - { - "epoch": 1.5344153892395551, - "grad_norm": 1.3652163743972778, - "learning_rate": 2.442641017934075e-05, - "loss": 0.5976, - "step": 173570 - }, - { - "epoch": 1.5345037924998675, - "grad_norm": 2.8738296031951904, - "learning_rate": 2.4424936791668877e-05, - "loss": 0.644, - "step": 173580 - }, - { - "epoch": 1.5345921957601796, - "grad_norm": 4.004851341247559, - "learning_rate": 2.442346340399701e-05, - "loss": 0.726, - "step": 173590 - }, - { - "epoch": 1.534680599020492, - "grad_norm": 2.2931952476501465, - "learning_rate": 2.4421990016325137e-05, - "loss": 0.583, - "step": 173600 - }, - { - "epoch": 1.5347690022808043, - "grad_norm": 2.7399141788482666, - "learning_rate": 2.4420516628653266e-05, - "loss": 0.5554, - "step": 173610 - }, - { - "epoch": 1.5348574055411164, - "grad_norm": 3.6382925510406494, - "learning_rate": 2.4419043240981397e-05, - "loss": 0.5901, - "step": 173620 - }, - { - "epoch": 1.5349458088014285, - "grad_norm": 17.632320404052734, - "learning_rate": 2.4417569853309526e-05, - "loss": 0.5753, - "step": 173630 - }, - { - "epoch": 1.5350342120617408, - "grad_norm": 6.246155261993408, - "learning_rate": 2.4416096465637654e-05, - "loss": 0.6553, - "step": 173640 - }, - { - "epoch": 1.5351226153220532, - "grad_norm": 2.1301167011260986, - "learning_rate": 2.4414623077965786e-05, - "loss": 0.6287, - "step": 173650 - }, - { - "epoch": 1.5352110185823653, - "grad_norm": 2.511080741882324, - "learning_rate": 2.4413149690293914e-05, - "loss": 0.5956, - "step": 173660 - }, - { - "epoch": 1.5352994218426774, - "grad_norm": 2.67580509185791, - "learning_rate": 2.4411676302622043e-05, - "loss": 0.5855, - "step": 173670 - }, - { - "epoch": 1.5353878251029898, - "grad_norm": 1.544997215270996, - "learning_rate": 2.441020291495017e-05, - "loss": 0.5898, - "step": 173680 - }, - { - "epoch": 1.535476228363302, - "grad_norm": 4.787408351898193, - "learning_rate": 2.4408729527278303e-05, - "loss": 0.6563, - "step": 173690 - }, - { - "epoch": 1.5355646316236142, - "grad_norm": 2.9431087970733643, - "learning_rate": 2.440725613960643e-05, - "loss": 0.6099, - "step": 173700 - }, - { - "epoch": 1.5356530348839266, - "grad_norm": 2.1662964820861816, - "learning_rate": 2.440578275193456e-05, - "loss": 0.5175, - "step": 173710 - }, - { - "epoch": 1.535741438144239, - "grad_norm": 1.3347502946853638, - "learning_rate": 2.440430936426269e-05, - "loss": 0.5018, - "step": 173720 - }, - { - "epoch": 1.535829841404551, - "grad_norm": 2.976702928543091, - "learning_rate": 2.440283597659082e-05, - "loss": 0.6384, - "step": 173730 - }, - { - "epoch": 1.5359182446648632, - "grad_norm": 2.4031283855438232, - "learning_rate": 2.4401362588918948e-05, - "loss": 0.5441, - "step": 173740 - }, - { - "epoch": 1.5360066479251755, - "grad_norm": 4.3219313621521, - "learning_rate": 2.4399889201247076e-05, - "loss": 0.6106, - "step": 173750 - }, - { - "epoch": 1.5360950511854878, - "grad_norm": 3.153588056564331, - "learning_rate": 2.4398415813575208e-05, - "loss": 0.6213, - "step": 173760 - }, - { - "epoch": 1.5361834544458, - "grad_norm": 1.836745023727417, - "learning_rate": 2.4396942425903336e-05, - "loss": 0.5739, - "step": 173770 - }, - { - "epoch": 1.536271857706112, - "grad_norm": 1.1322550773620605, - "learning_rate": 2.4395469038231465e-05, - "loss": 0.5097, - "step": 173780 - }, - { - "epoch": 1.5363602609664244, - "grad_norm": 3.6168644428253174, - "learning_rate": 2.4393995650559596e-05, - "loss": 0.7178, - "step": 173790 - }, - { - "epoch": 1.5364486642267368, - "grad_norm": 2.928410291671753, - "learning_rate": 2.4392522262887725e-05, - "loss": 0.5653, - "step": 173800 - }, - { - "epoch": 1.5365370674870489, - "grad_norm": 1.3587273359298706, - "learning_rate": 2.4391048875215853e-05, - "loss": 0.511, - "step": 173810 - }, - { - "epoch": 1.536625470747361, - "grad_norm": 4.807299613952637, - "learning_rate": 2.438957548754398e-05, - "loss": 0.5686, - "step": 173820 - }, - { - "epoch": 1.5367138740076736, - "grad_norm": 2.6591005325317383, - "learning_rate": 2.4388102099872113e-05, - "loss": 0.62, - "step": 173830 - }, - { - "epoch": 1.5368022772679857, - "grad_norm": 1.6205626726150513, - "learning_rate": 2.438662871220024e-05, - "loss": 0.5527, - "step": 173840 - }, - { - "epoch": 1.5368906805282978, - "grad_norm": 1.0479035377502441, - "learning_rate": 2.438515532452837e-05, - "loss": 0.7163, - "step": 173850 - }, - { - "epoch": 1.5369790837886101, - "grad_norm": 4.933063507080078, - "learning_rate": 2.4383681936856498e-05, - "loss": 0.6411, - "step": 173860 - }, - { - "epoch": 1.5370674870489225, - "grad_norm": 1.8709900379180908, - "learning_rate": 2.438220854918463e-05, - "loss": 0.5911, - "step": 173870 - }, - { - "epoch": 1.5371558903092346, - "grad_norm": 1.4639195203781128, - "learning_rate": 2.438073516151276e-05, - "loss": 0.6079, - "step": 173880 - }, - { - "epoch": 1.5372442935695467, - "grad_norm": 2.9190611839294434, - "learning_rate": 2.4379261773840887e-05, - "loss": 0.5513, - "step": 173890 - }, - { - "epoch": 1.537332696829859, - "grad_norm": 2.1594042778015137, - "learning_rate": 2.437778838616902e-05, - "loss": 0.6593, - "step": 173900 - }, - { - "epoch": 1.5374211000901714, - "grad_norm": 3.106978416442871, - "learning_rate": 2.4376314998497147e-05, - "loss": 0.5475, - "step": 173910 - }, - { - "epoch": 1.5375095033504835, - "grad_norm": 8.02630615234375, - "learning_rate": 2.4374841610825275e-05, - "loss": 0.7161, - "step": 173920 - }, - { - "epoch": 1.5375979066107957, - "grad_norm": 1.470693826675415, - "learning_rate": 2.4373368223153403e-05, - "loss": 0.5072, - "step": 173930 - }, - { - "epoch": 1.537686309871108, - "grad_norm": 1.2773759365081787, - "learning_rate": 2.4371894835481535e-05, - "loss": 0.5743, - "step": 173940 - }, - { - "epoch": 1.5377747131314203, - "grad_norm": 1.7460817098617554, - "learning_rate": 2.4370421447809664e-05, - "loss": 0.4711, - "step": 173950 - }, - { - "epoch": 1.5378631163917325, - "grad_norm": 2.5927224159240723, - "learning_rate": 2.4368948060137792e-05, - "loss": 0.7205, - "step": 173960 - }, - { - "epoch": 1.5379515196520448, - "grad_norm": 1.0669831037521362, - "learning_rate": 2.4367474672465924e-05, - "loss": 0.5475, - "step": 173970 - }, - { - "epoch": 1.5380399229123571, - "grad_norm": 1.3232972621917725, - "learning_rate": 2.4366001284794052e-05, - "loss": 0.6209, - "step": 173980 - }, - { - "epoch": 1.5381283261726693, - "grad_norm": 2.118748188018799, - "learning_rate": 2.436452789712218e-05, - "loss": 0.6935, - "step": 173990 - }, - { - "epoch": 1.5382167294329814, - "grad_norm": 3.4111011028289795, - "learning_rate": 2.436305450945031e-05, - "loss": 0.6815, - "step": 174000 - }, - { - "epoch": 1.5383051326932937, - "grad_norm": 1.8020133972167969, - "learning_rate": 2.436158112177844e-05, - "loss": 0.7691, - "step": 174010 - }, - { - "epoch": 1.538393535953606, - "grad_norm": 2.221857786178589, - "learning_rate": 2.436010773410657e-05, - "loss": 0.4535, - "step": 174020 - }, - { - "epoch": 1.5384819392139182, - "grad_norm": 1.7068196535110474, - "learning_rate": 2.4358634346434697e-05, - "loss": 0.552, - "step": 174030 - }, - { - "epoch": 1.5385703424742303, - "grad_norm": 3.848716974258423, - "learning_rate": 2.4357160958762826e-05, - "loss": 0.4983, - "step": 174040 - }, - { - "epoch": 1.5386587457345426, - "grad_norm": 1.0871692895889282, - "learning_rate": 2.4355687571090957e-05, - "loss": 0.5567, - "step": 174050 - }, - { - "epoch": 1.538747148994855, - "grad_norm": 3.127941131591797, - "learning_rate": 2.4354214183419086e-05, - "loss": 0.582, - "step": 174060 - }, - { - "epoch": 1.538835552255167, - "grad_norm": 2.89378023147583, - "learning_rate": 2.4352740795747214e-05, - "loss": 0.5841, - "step": 174070 - }, - { - "epoch": 1.5389239555154794, - "grad_norm": 2.551440715789795, - "learning_rate": 2.4351267408075346e-05, - "loss": 0.5411, - "step": 174080 - }, - { - "epoch": 1.5390123587757918, - "grad_norm": 2.0462491512298584, - "learning_rate": 2.4349794020403474e-05, - "loss": 0.7938, - "step": 174090 - }, - { - "epoch": 1.539100762036104, - "grad_norm": 1.5742062330245972, - "learning_rate": 2.4348320632731602e-05, - "loss": 0.7603, - "step": 174100 - }, - { - "epoch": 1.539189165296416, - "grad_norm": 1.2905710935592651, - "learning_rate": 2.434684724505973e-05, - "loss": 0.7115, - "step": 174110 - }, - { - "epoch": 1.5392775685567284, - "grad_norm": 1.3261444568634033, - "learning_rate": 2.4345373857387863e-05, - "loss": 0.6477, - "step": 174120 - }, - { - "epoch": 1.5393659718170407, - "grad_norm": 1.732905387878418, - "learning_rate": 2.434390046971599e-05, - "loss": 0.6756, - "step": 174130 - }, - { - "epoch": 1.5394543750773528, - "grad_norm": 1.3431121110916138, - "learning_rate": 2.434242708204412e-05, - "loss": 0.4962, - "step": 174140 - }, - { - "epoch": 1.539542778337665, - "grad_norm": 1.2139936685562134, - "learning_rate": 2.4340953694372248e-05, - "loss": 0.7039, - "step": 174150 - }, - { - "epoch": 1.5396311815979773, - "grad_norm": 3.871922016143799, - "learning_rate": 2.433948030670038e-05, - "loss": 0.5405, - "step": 174160 - }, - { - "epoch": 1.5397195848582896, - "grad_norm": 16.565261840820312, - "learning_rate": 2.4338006919028508e-05, - "loss": 0.5995, - "step": 174170 - }, - { - "epoch": 1.5398079881186018, - "grad_norm": 4.501935958862305, - "learning_rate": 2.4336533531356636e-05, - "loss": 0.5628, - "step": 174180 - }, - { - "epoch": 1.539896391378914, - "grad_norm": 10.195740699768066, - "learning_rate": 2.4335060143684768e-05, - "loss": 0.491, - "step": 174190 - }, - { - "epoch": 1.5399847946392264, - "grad_norm": 1.7179523706436157, - "learning_rate": 2.4333586756012896e-05, - "loss": 0.5862, - "step": 174200 - }, - { - "epoch": 1.5400731978995386, - "grad_norm": 3.060002326965332, - "learning_rate": 2.4332113368341025e-05, - "loss": 0.5974, - "step": 174210 - }, - { - "epoch": 1.5401616011598507, - "grad_norm": 4.0132670402526855, - "learning_rate": 2.4330639980669153e-05, - "loss": 0.593, - "step": 174220 - }, - { - "epoch": 1.540250004420163, - "grad_norm": 27.567434310913086, - "learning_rate": 2.4329166592997285e-05, - "loss": 0.682, - "step": 174230 - }, - { - "epoch": 1.5403384076804754, - "grad_norm": 11.265193939208984, - "learning_rate": 2.4327693205325413e-05, - "loss": 0.4424, - "step": 174240 - }, - { - "epoch": 1.5404268109407875, - "grad_norm": 2.146970272064209, - "learning_rate": 2.432621981765354e-05, - "loss": 0.5641, - "step": 174250 - }, - { - "epoch": 1.5405152142010996, - "grad_norm": 3.656827688217163, - "learning_rate": 2.4324746429981673e-05, - "loss": 0.5437, - "step": 174260 - }, - { - "epoch": 1.540603617461412, - "grad_norm": 7.898015022277832, - "learning_rate": 2.43232730423098e-05, - "loss": 0.5011, - "step": 174270 - }, - { - "epoch": 1.5406920207217243, - "grad_norm": 2.4155168533325195, - "learning_rate": 2.432179965463793e-05, - "loss": 0.6331, - "step": 174280 - }, - { - "epoch": 1.5407804239820364, - "grad_norm": 4.174191951751709, - "learning_rate": 2.4320326266966058e-05, - "loss": 0.5373, - "step": 174290 - }, - { - "epoch": 1.5408688272423487, - "grad_norm": 3.2757620811462402, - "learning_rate": 2.431885287929419e-05, - "loss": 0.6991, - "step": 174300 - }, - { - "epoch": 1.540957230502661, - "grad_norm": 8.390728950500488, - "learning_rate": 2.4317379491622318e-05, - "loss": 0.6275, - "step": 174310 - }, - { - "epoch": 1.5410456337629732, - "grad_norm": 1.6958503723144531, - "learning_rate": 2.4315906103950447e-05, - "loss": 0.4597, - "step": 174320 - }, - { - "epoch": 1.5411340370232853, - "grad_norm": 6.810363292694092, - "learning_rate": 2.4314432716278575e-05, - "loss": 0.6064, - "step": 174330 - }, - { - "epoch": 1.5412224402835977, - "grad_norm": 2.7560408115386963, - "learning_rate": 2.4312959328606707e-05, - "loss": 0.5504, - "step": 174340 - }, - { - "epoch": 1.54131084354391, - "grad_norm": 2.6676573753356934, - "learning_rate": 2.4311485940934835e-05, - "loss": 0.5673, - "step": 174350 - }, - { - "epoch": 1.5413992468042221, - "grad_norm": 2.806307077407837, - "learning_rate": 2.4310012553262963e-05, - "loss": 0.6896, - "step": 174360 - }, - { - "epoch": 1.5414876500645343, - "grad_norm": 4.78728723526001, - "learning_rate": 2.4308539165591095e-05, - "loss": 0.7291, - "step": 174370 - }, - { - "epoch": 1.5415760533248466, - "grad_norm": 0.6354554891586304, - "learning_rate": 2.4307065777919223e-05, - "loss": 0.4028, - "step": 174380 - }, - { - "epoch": 1.541664456585159, - "grad_norm": 2.0700461864471436, - "learning_rate": 2.4305592390247352e-05, - "loss": 0.7117, - "step": 174390 - }, - { - "epoch": 1.541752859845471, - "grad_norm": 5.30848503112793, - "learning_rate": 2.430411900257548e-05, - "loss": 0.6401, - "step": 174400 - }, - { - "epoch": 1.5418412631057832, - "grad_norm": 1.5651367902755737, - "learning_rate": 2.4302645614903612e-05, - "loss": 0.6562, - "step": 174410 - }, - { - "epoch": 1.5419296663660957, - "grad_norm": 14.498424530029297, - "learning_rate": 2.430117222723174e-05, - "loss": 0.6783, - "step": 174420 - }, - { - "epoch": 1.5420180696264079, - "grad_norm": 2.9781339168548584, - "learning_rate": 2.429969883955987e-05, - "loss": 0.4447, - "step": 174430 - }, - { - "epoch": 1.54210647288672, - "grad_norm": 2.2297732830047607, - "learning_rate": 2.4298225451888e-05, - "loss": 0.6672, - "step": 174440 - }, - { - "epoch": 1.5421948761470323, - "grad_norm": 6.882424354553223, - "learning_rate": 2.429675206421613e-05, - "loss": 0.6849, - "step": 174450 - }, - { - "epoch": 1.5422832794073447, - "grad_norm": 4.663057327270508, - "learning_rate": 2.4295278676544257e-05, - "loss": 0.7398, - "step": 174460 - }, - { - "epoch": 1.5423716826676568, - "grad_norm": 1.9308974742889404, - "learning_rate": 2.429380528887239e-05, - "loss": 0.5704, - "step": 174470 - }, - { - "epoch": 1.542460085927969, - "grad_norm": 3.26165771484375, - "learning_rate": 2.4292331901200517e-05, - "loss": 0.6808, - "step": 174480 - }, - { - "epoch": 1.5425484891882812, - "grad_norm": 2.3461320400238037, - "learning_rate": 2.4290858513528646e-05, - "loss": 0.5786, - "step": 174490 - }, - { - "epoch": 1.5426368924485936, - "grad_norm": 1.0739569664001465, - "learning_rate": 2.4289385125856777e-05, - "loss": 0.5507, - "step": 174500 - }, - { - "epoch": 1.5427252957089057, - "grad_norm": 1.6214160919189453, - "learning_rate": 2.4287911738184906e-05, - "loss": 0.5693, - "step": 174510 - }, - { - "epoch": 1.5428136989692178, - "grad_norm": 2.236431121826172, - "learning_rate": 2.4286438350513034e-05, - "loss": 0.6735, - "step": 174520 - }, - { - "epoch": 1.5429021022295302, - "grad_norm": 1.9672120809555054, - "learning_rate": 2.4284964962841166e-05, - "loss": 0.5716, - "step": 174530 - }, - { - "epoch": 1.5429905054898425, - "grad_norm": 2.1955885887145996, - "learning_rate": 2.4283491575169294e-05, - "loss": 0.5009, - "step": 174540 - }, - { - "epoch": 1.5430789087501546, - "grad_norm": 3.2552270889282227, - "learning_rate": 2.4282018187497422e-05, - "loss": 0.5852, - "step": 174550 - }, - { - "epoch": 1.543167312010467, - "grad_norm": 4.058462142944336, - "learning_rate": 2.4280544799825554e-05, - "loss": 0.668, - "step": 174560 - }, - { - "epoch": 1.5432557152707793, - "grad_norm": 8.593563079833984, - "learning_rate": 2.4279071412153683e-05, - "loss": 0.6956, - "step": 174570 - }, - { - "epoch": 1.5433441185310914, - "grad_norm": 3.849159002304077, - "learning_rate": 2.427759802448181e-05, - "loss": 0.5241, - "step": 174580 - }, - { - "epoch": 1.5434325217914036, - "grad_norm": 2.745166301727295, - "learning_rate": 2.4276124636809943e-05, - "loss": 0.5873, - "step": 174590 - }, - { - "epoch": 1.543520925051716, - "grad_norm": 2.396702527999878, - "learning_rate": 2.427465124913807e-05, - "loss": 0.5339, - "step": 174600 - }, - { - "epoch": 1.5436093283120282, - "grad_norm": 4.112366676330566, - "learning_rate": 2.42731778614662e-05, - "loss": 0.5718, - "step": 174610 - }, - { - "epoch": 1.5436977315723404, - "grad_norm": 2.43721342086792, - "learning_rate": 2.4271704473794328e-05, - "loss": 0.5891, - "step": 174620 - }, - { - "epoch": 1.5437861348326525, - "grad_norm": 11.213445663452148, - "learning_rate": 2.427023108612246e-05, - "loss": 0.5832, - "step": 174630 - }, - { - "epoch": 1.5438745380929648, - "grad_norm": 7.7145490646362305, - "learning_rate": 2.4268757698450588e-05, - "loss": 0.7186, - "step": 174640 - }, - { - "epoch": 1.5439629413532772, - "grad_norm": 2.1213717460632324, - "learning_rate": 2.4267284310778716e-05, - "loss": 0.6147, - "step": 174650 - }, - { - "epoch": 1.5440513446135893, - "grad_norm": 4.611488342285156, - "learning_rate": 2.4265810923106848e-05, - "loss": 0.6268, - "step": 174660 - }, - { - "epoch": 1.5441397478739016, - "grad_norm": 3.4685707092285156, - "learning_rate": 2.4264337535434976e-05, - "loss": 0.7543, - "step": 174670 - }, - { - "epoch": 1.544228151134214, - "grad_norm": 1.8858023881912231, - "learning_rate": 2.4262864147763105e-05, - "loss": 0.6068, - "step": 174680 - }, - { - "epoch": 1.544316554394526, - "grad_norm": 1.863261342048645, - "learning_rate": 2.4261390760091233e-05, - "loss": 0.5625, - "step": 174690 - }, - { - "epoch": 1.5444049576548382, - "grad_norm": 1.830488681793213, - "learning_rate": 2.4259917372419365e-05, - "loss": 0.5935, - "step": 174700 - }, - { - "epoch": 1.5444933609151505, - "grad_norm": 17.094575881958008, - "learning_rate": 2.4258443984747493e-05, - "loss": 0.5845, - "step": 174710 - }, - { - "epoch": 1.544581764175463, - "grad_norm": 3.5949769020080566, - "learning_rate": 2.425697059707562e-05, - "loss": 0.5959, - "step": 174720 - }, - { - "epoch": 1.544670167435775, - "grad_norm": 3.0940229892730713, - "learning_rate": 2.4255497209403753e-05, - "loss": 0.6586, - "step": 174730 - }, - { - "epoch": 1.5447585706960871, - "grad_norm": 1.356379508972168, - "learning_rate": 2.425402382173188e-05, - "loss": 0.6374, - "step": 174740 - }, - { - "epoch": 1.5448469739563995, - "grad_norm": 6.310287952423096, - "learning_rate": 2.425255043406001e-05, - "loss": 0.6854, - "step": 174750 - }, - { - "epoch": 1.5449353772167118, - "grad_norm": 5.824239730834961, - "learning_rate": 2.4251077046388138e-05, - "loss": 0.7386, - "step": 174760 - }, - { - "epoch": 1.545023780477024, - "grad_norm": 2.230116844177246, - "learning_rate": 2.424960365871627e-05, - "loss": 0.4884, - "step": 174770 - }, - { - "epoch": 1.5451121837373363, - "grad_norm": 1.2905182838439941, - "learning_rate": 2.4248130271044398e-05, - "loss": 0.5113, - "step": 174780 - }, - { - "epoch": 1.5452005869976486, - "grad_norm": 2.199789524078369, - "learning_rate": 2.4246656883372527e-05, - "loss": 0.5855, - "step": 174790 - }, - { - "epoch": 1.5452889902579607, - "grad_norm": 2.3680264949798584, - "learning_rate": 2.4245183495700655e-05, - "loss": 0.5911, - "step": 174800 - }, - { - "epoch": 1.5453773935182729, - "grad_norm": 1.7917861938476562, - "learning_rate": 2.4243710108028787e-05, - "loss": 0.6001, - "step": 174810 - }, - { - "epoch": 1.5454657967785852, - "grad_norm": 1.727258324623108, - "learning_rate": 2.4242236720356915e-05, - "loss": 0.6476, - "step": 174820 - }, - { - "epoch": 1.5455542000388975, - "grad_norm": 1.3490941524505615, - "learning_rate": 2.4240763332685043e-05, - "loss": 0.5915, - "step": 174830 - }, - { - "epoch": 1.5456426032992097, - "grad_norm": 1.7392785549163818, - "learning_rate": 2.4239289945013175e-05, - "loss": 0.5682, - "step": 174840 - }, - { - "epoch": 1.5457310065595218, - "grad_norm": 2.2393693923950195, - "learning_rate": 2.4237816557341304e-05, - "loss": 0.55, - "step": 174850 - }, - { - "epoch": 1.5458194098198341, - "grad_norm": 14.635035514831543, - "learning_rate": 2.4236343169669432e-05, - "loss": 0.5477, - "step": 174860 - }, - { - "epoch": 1.5459078130801465, - "grad_norm": 2.3071987628936768, - "learning_rate": 2.423486978199756e-05, - "loss": 0.7754, - "step": 174870 - }, - { - "epoch": 1.5459962163404586, - "grad_norm": 1.2465306520462036, - "learning_rate": 2.4233396394325692e-05, - "loss": 0.6085, - "step": 174880 - }, - { - "epoch": 1.546084619600771, - "grad_norm": 1.6105276346206665, - "learning_rate": 2.423192300665382e-05, - "loss": 0.694, - "step": 174890 - }, - { - "epoch": 1.5461730228610833, - "grad_norm": 0.9031888246536255, - "learning_rate": 2.423044961898195e-05, - "loss": 0.6845, - "step": 174900 - }, - { - "epoch": 1.5462614261213954, - "grad_norm": 1.2699271440505981, - "learning_rate": 2.422897623131008e-05, - "loss": 0.5156, - "step": 174910 - }, - { - "epoch": 1.5463498293817075, - "grad_norm": 3.84972882270813, - "learning_rate": 2.422750284363821e-05, - "loss": 0.6114, - "step": 174920 - }, - { - "epoch": 1.5464382326420198, - "grad_norm": 2.710536003112793, - "learning_rate": 2.4226029455966337e-05, - "loss": 0.6652, - "step": 174930 - }, - { - "epoch": 1.5465266359023322, - "grad_norm": 1.5496443510055542, - "learning_rate": 2.4224556068294465e-05, - "loss": 0.5837, - "step": 174940 - }, - { - "epoch": 1.5466150391626443, - "grad_norm": 4.660191535949707, - "learning_rate": 2.4223082680622597e-05, - "loss": 0.5843, - "step": 174950 - }, - { - "epoch": 1.5467034424229564, - "grad_norm": 4.089006423950195, - "learning_rate": 2.4221609292950726e-05, - "loss": 0.5533, - "step": 174960 - }, - { - "epoch": 1.5467918456832688, - "grad_norm": 5.7805256843566895, - "learning_rate": 2.4220135905278854e-05, - "loss": 0.5759, - "step": 174970 - }, - { - "epoch": 1.5468802489435811, - "grad_norm": 1.7269043922424316, - "learning_rate": 2.4218662517606982e-05, - "loss": 0.5542, - "step": 174980 - }, - { - "epoch": 1.5469686522038932, - "grad_norm": 3.431790828704834, - "learning_rate": 2.4217189129935114e-05, - "loss": 0.659, - "step": 174990 - }, - { - "epoch": 1.5470570554642054, - "grad_norm": 1.9091544151306152, - "learning_rate": 2.4215715742263242e-05, - "loss": 0.6308, - "step": 175000 - }, - { - "epoch": 1.547145458724518, - "grad_norm": 1.4534193277359009, - "learning_rate": 2.421424235459137e-05, - "loss": 0.5355, - "step": 175010 - }, - { - "epoch": 1.54723386198483, - "grad_norm": 2.6281485557556152, - "learning_rate": 2.4212768966919502e-05, - "loss": 0.6187, - "step": 175020 - }, - { - "epoch": 1.5473222652451422, - "grad_norm": 1.8663088083267212, - "learning_rate": 2.421129557924763e-05, - "loss": 0.7375, - "step": 175030 - }, - { - "epoch": 1.5474106685054545, - "grad_norm": 6.004947662353516, - "learning_rate": 2.420982219157576e-05, - "loss": 0.6483, - "step": 175040 - }, - { - "epoch": 1.5474990717657668, - "grad_norm": 3.082247018814087, - "learning_rate": 2.4208348803903888e-05, - "loss": 0.6405, - "step": 175050 - }, - { - "epoch": 1.547587475026079, - "grad_norm": 1.9165914058685303, - "learning_rate": 2.420687541623202e-05, - "loss": 0.4826, - "step": 175060 - }, - { - "epoch": 1.547675878286391, - "grad_norm": 4.244795322418213, - "learning_rate": 2.4205402028560148e-05, - "loss": 0.6104, - "step": 175070 - }, - { - "epoch": 1.5477642815467034, - "grad_norm": 5.788334846496582, - "learning_rate": 2.4203928640888276e-05, - "loss": 0.5436, - "step": 175080 - }, - { - "epoch": 1.5478526848070158, - "grad_norm": 1.8514665365219116, - "learning_rate": 2.4202455253216408e-05, - "loss": 0.6689, - "step": 175090 - }, - { - "epoch": 1.5479410880673279, - "grad_norm": 5.626691818237305, - "learning_rate": 2.4200981865544536e-05, - "loss": 0.6319, - "step": 175100 - }, - { - "epoch": 1.54802949132764, - "grad_norm": 4.971273899078369, - "learning_rate": 2.4199508477872664e-05, - "loss": 0.5258, - "step": 175110 - }, - { - "epoch": 1.5481178945879526, - "grad_norm": 4.934142112731934, - "learning_rate": 2.4198035090200793e-05, - "loss": 0.4408, - "step": 175120 - }, - { - "epoch": 1.5482062978482647, - "grad_norm": 14.0502347946167, - "learning_rate": 2.4196561702528925e-05, - "loss": 0.4426, - "step": 175130 - }, - { - "epoch": 1.5482947011085768, - "grad_norm": 2.326627016067505, - "learning_rate": 2.4195088314857053e-05, - "loss": 0.6075, - "step": 175140 - }, - { - "epoch": 1.5483831043688892, - "grad_norm": 2.2757251262664795, - "learning_rate": 2.419361492718518e-05, - "loss": 0.6883, - "step": 175150 - }, - { - "epoch": 1.5484715076292015, - "grad_norm": 2.406067371368408, - "learning_rate": 2.419214153951331e-05, - "loss": 0.6454, - "step": 175160 - }, - { - "epoch": 1.5485599108895136, - "grad_norm": 2.498495578765869, - "learning_rate": 2.419066815184144e-05, - "loss": 0.5722, - "step": 175170 - }, - { - "epoch": 1.5486483141498257, - "grad_norm": 3.516601800918579, - "learning_rate": 2.418919476416957e-05, - "loss": 0.5675, - "step": 175180 - }, - { - "epoch": 1.548736717410138, - "grad_norm": 1.4392104148864746, - "learning_rate": 2.4187721376497698e-05, - "loss": 0.5271, - "step": 175190 - }, - { - "epoch": 1.5488251206704504, - "grad_norm": 3.1388044357299805, - "learning_rate": 2.418624798882583e-05, - "loss": 0.6841, - "step": 175200 - }, - { - "epoch": 1.5489135239307625, - "grad_norm": 4.868514060974121, - "learning_rate": 2.4184774601153958e-05, - "loss": 0.5321, - "step": 175210 - }, - { - "epoch": 1.5490019271910747, - "grad_norm": 4.692134380340576, - "learning_rate": 2.4183301213482087e-05, - "loss": 0.6547, - "step": 175220 - }, - { - "epoch": 1.549090330451387, - "grad_norm": 1.6500064134597778, - "learning_rate": 2.4181827825810215e-05, - "loss": 0.6132, - "step": 175230 - }, - { - "epoch": 1.5491787337116993, - "grad_norm": 2.7626256942749023, - "learning_rate": 2.4180354438138347e-05, - "loss": 0.4775, - "step": 175240 - }, - { - "epoch": 1.5492671369720115, - "grad_norm": 1.8304368257522583, - "learning_rate": 2.4178881050466475e-05, - "loss": 0.7541, - "step": 175250 - }, - { - "epoch": 1.5493555402323238, - "grad_norm": 8.355866432189941, - "learning_rate": 2.4177407662794603e-05, - "loss": 0.7308, - "step": 175260 - }, - { - "epoch": 1.5494439434926361, - "grad_norm": 1.6950831413269043, - "learning_rate": 2.417593427512273e-05, - "loss": 0.5514, - "step": 175270 - }, - { - "epoch": 1.5495323467529483, - "grad_norm": 0.6586547493934631, - "learning_rate": 2.4174460887450863e-05, - "loss": 0.5595, - "step": 175280 - }, - { - "epoch": 1.5496207500132604, - "grad_norm": 9.266799926757812, - "learning_rate": 2.4172987499778992e-05, - "loss": 0.4993, - "step": 175290 - }, - { - "epoch": 1.5497091532735727, - "grad_norm": 1.9516414403915405, - "learning_rate": 2.417151411210712e-05, - "loss": 0.6373, - "step": 175300 - }, - { - "epoch": 1.549797556533885, - "grad_norm": 3.445497512817383, - "learning_rate": 2.4170040724435252e-05, - "loss": 0.6679, - "step": 175310 - }, - { - "epoch": 1.5498859597941972, - "grad_norm": 2.7462542057037354, - "learning_rate": 2.416856733676338e-05, - "loss": 0.4786, - "step": 175320 - }, - { - "epoch": 1.5499743630545093, - "grad_norm": 4.17148494720459, - "learning_rate": 2.416709394909151e-05, - "loss": 0.6029, - "step": 175330 - }, - { - "epoch": 1.5500627663148216, - "grad_norm": 1.4304940700531006, - "learning_rate": 2.4165620561419637e-05, - "loss": 0.466, - "step": 175340 - }, - { - "epoch": 1.550151169575134, - "grad_norm": 2.634665012359619, - "learning_rate": 2.416414717374777e-05, - "loss": 0.6591, - "step": 175350 - }, - { - "epoch": 1.5502395728354461, - "grad_norm": 6.376070499420166, - "learning_rate": 2.4162673786075897e-05, - "loss": 0.6488, - "step": 175360 - }, - { - "epoch": 1.5503279760957585, - "grad_norm": 2.5970404148101807, - "learning_rate": 2.4161200398404025e-05, - "loss": 0.5881, - "step": 175370 - }, - { - "epoch": 1.5504163793560708, - "grad_norm": 3.7440390586853027, - "learning_rate": 2.4159727010732157e-05, - "loss": 0.5171, - "step": 175380 - }, - { - "epoch": 1.550504782616383, - "grad_norm": 4.337168216705322, - "learning_rate": 2.4158253623060285e-05, - "loss": 0.5246, - "step": 175390 - }, - { - "epoch": 1.550593185876695, - "grad_norm": 1.1312719583511353, - "learning_rate": 2.4156780235388414e-05, - "loss": 0.5638, - "step": 175400 - }, - { - "epoch": 1.5506815891370074, - "grad_norm": 2.632840871810913, - "learning_rate": 2.4155306847716546e-05, - "loss": 0.5473, - "step": 175410 - }, - { - "epoch": 1.5507699923973197, - "grad_norm": 1.6288197040557861, - "learning_rate": 2.4153833460044674e-05, - "loss": 0.6525, - "step": 175420 - }, - { - "epoch": 1.5508583956576318, - "grad_norm": 6.673985481262207, - "learning_rate": 2.4152360072372802e-05, - "loss": 0.5225, - "step": 175430 - }, - { - "epoch": 1.550946798917944, - "grad_norm": 3.8512940406799316, - "learning_rate": 2.4150886684700934e-05, - "loss": 0.5995, - "step": 175440 - }, - { - "epoch": 1.5510352021782563, - "grad_norm": 2.3673620223999023, - "learning_rate": 2.4149413297029062e-05, - "loss": 0.5624, - "step": 175450 - }, - { - "epoch": 1.5511236054385686, - "grad_norm": 3.6392786502838135, - "learning_rate": 2.414793990935719e-05, - "loss": 0.4961, - "step": 175460 - }, - { - "epoch": 1.5512120086988808, - "grad_norm": 3.440269947052002, - "learning_rate": 2.4146466521685322e-05, - "loss": 0.5754, - "step": 175470 - }, - { - "epoch": 1.551300411959193, - "grad_norm": 1.4280959367752075, - "learning_rate": 2.414499313401345e-05, - "loss": 0.6204, - "step": 175480 - }, - { - "epoch": 1.5513888152195054, - "grad_norm": 1.9678702354431152, - "learning_rate": 2.414351974634158e-05, - "loss": 0.5157, - "step": 175490 - }, - { - "epoch": 1.5514772184798176, - "grad_norm": 8.589799880981445, - "learning_rate": 2.414204635866971e-05, - "loss": 0.6506, - "step": 175500 - }, - { - "epoch": 1.5515656217401297, - "grad_norm": 1.5467242002487183, - "learning_rate": 2.414057297099784e-05, - "loss": 0.5876, - "step": 175510 - }, - { - "epoch": 1.551654025000442, - "grad_norm": 6.826015472412109, - "learning_rate": 2.4139099583325968e-05, - "loss": 0.5367, - "step": 175520 - }, - { - "epoch": 1.5517424282607544, - "grad_norm": 2.0968873500823975, - "learning_rate": 2.41376261956541e-05, - "loss": 0.5819, - "step": 175530 - }, - { - "epoch": 1.5518308315210665, - "grad_norm": 1.9116806983947754, - "learning_rate": 2.4136152807982228e-05, - "loss": 0.7346, - "step": 175540 - }, - { - "epoch": 1.5519192347813786, - "grad_norm": 2.7878074645996094, - "learning_rate": 2.4134679420310356e-05, - "loss": 0.6796, - "step": 175550 - }, - { - "epoch": 1.552007638041691, - "grad_norm": 5.731505393981934, - "learning_rate": 2.4133206032638488e-05, - "loss": 0.5371, - "step": 175560 - }, - { - "epoch": 1.5520960413020033, - "grad_norm": 2.0684056282043457, - "learning_rate": 2.4131732644966616e-05, - "loss": 0.555, - "step": 175570 - }, - { - "epoch": 1.5521844445623154, - "grad_norm": 7.5566487312316895, - "learning_rate": 2.4130259257294745e-05, - "loss": 0.6837, - "step": 175580 - }, - { - "epoch": 1.5522728478226275, - "grad_norm": 3.572631597518921, - "learning_rate": 2.4128785869622873e-05, - "loss": 0.5693, - "step": 175590 - }, - { - "epoch": 1.55236125108294, - "grad_norm": 2.8073689937591553, - "learning_rate": 2.4127312481951005e-05, - "loss": 0.6157, - "step": 175600 - }, - { - "epoch": 1.5524496543432522, - "grad_norm": 1.7215214967727661, - "learning_rate": 2.4125839094279133e-05, - "loss": 0.6523, - "step": 175610 - }, - { - "epoch": 1.5525380576035643, - "grad_norm": 4.865016460418701, - "learning_rate": 2.412436570660726e-05, - "loss": 0.5473, - "step": 175620 - }, - { - "epoch": 1.5526264608638767, - "grad_norm": 1.9606802463531494, - "learning_rate": 2.412289231893539e-05, - "loss": 0.6233, - "step": 175630 - }, - { - "epoch": 1.552714864124189, - "grad_norm": 6.350049018859863, - "learning_rate": 2.412141893126352e-05, - "loss": 0.4618, - "step": 175640 - }, - { - "epoch": 1.5528032673845011, - "grad_norm": 13.56799030303955, - "learning_rate": 2.411994554359165e-05, - "loss": 0.4577, - "step": 175650 - }, - { - "epoch": 1.5528916706448133, - "grad_norm": 3.8896825313568115, - "learning_rate": 2.4118472155919778e-05, - "loss": 0.6352, - "step": 175660 - }, - { - "epoch": 1.5529800739051256, - "grad_norm": 2.473496675491333, - "learning_rate": 2.411699876824791e-05, - "loss": 0.6829, - "step": 175670 - }, - { - "epoch": 1.553068477165438, - "grad_norm": 3.9788055419921875, - "learning_rate": 2.4115525380576038e-05, - "loss": 0.5836, - "step": 175680 - }, - { - "epoch": 1.55315688042575, - "grad_norm": 2.101442575454712, - "learning_rate": 2.4114051992904167e-05, - "loss": 0.6256, - "step": 175690 - }, - { - "epoch": 1.5532452836860622, - "grad_norm": 4.8250041007995605, - "learning_rate": 2.4112578605232295e-05, - "loss": 0.6183, - "step": 175700 - }, - { - "epoch": 1.5533336869463747, - "grad_norm": 5.208817481994629, - "learning_rate": 2.4111105217560427e-05, - "loss": 0.6138, - "step": 175710 - }, - { - "epoch": 1.5534220902066869, - "grad_norm": 3.2474365234375, - "learning_rate": 2.4109631829888555e-05, - "loss": 0.5583, - "step": 175720 - }, - { - "epoch": 1.553510493466999, - "grad_norm": 2.2283647060394287, - "learning_rate": 2.4108158442216683e-05, - "loss": 0.5087, - "step": 175730 - }, - { - "epoch": 1.5535988967273113, - "grad_norm": 3.2932844161987305, - "learning_rate": 2.4106685054544812e-05, - "loss": 0.5104, - "step": 175740 - }, - { - "epoch": 1.5536872999876237, - "grad_norm": 3.431257963180542, - "learning_rate": 2.4105211666872943e-05, - "loss": 0.5497, - "step": 175750 - }, - { - "epoch": 1.5537757032479358, - "grad_norm": 1.9445186853408813, - "learning_rate": 2.4103738279201072e-05, - "loss": 0.5849, - "step": 175760 - }, - { - "epoch": 1.553864106508248, - "grad_norm": 8.358589172363281, - "learning_rate": 2.41022648915292e-05, - "loss": 0.6241, - "step": 175770 - }, - { - "epoch": 1.5539525097685603, - "grad_norm": 2.708808660507202, - "learning_rate": 2.4100791503857332e-05, - "loss": 0.7163, - "step": 175780 - }, - { - "epoch": 1.5540409130288726, - "grad_norm": 1.8191673755645752, - "learning_rate": 2.409931811618546e-05, - "loss": 0.5747, - "step": 175790 - }, - { - "epoch": 1.5541293162891847, - "grad_norm": 6.4892144203186035, - "learning_rate": 2.409784472851359e-05, - "loss": 0.6796, - "step": 175800 - }, - { - "epoch": 1.5542177195494968, - "grad_norm": 2.453010082244873, - "learning_rate": 2.4096371340841717e-05, - "loss": 0.5122, - "step": 175810 - }, - { - "epoch": 1.5543061228098092, - "grad_norm": 3.2654922008514404, - "learning_rate": 2.409489795316985e-05, - "loss": 0.619, - "step": 175820 - }, - { - "epoch": 1.5543945260701215, - "grad_norm": 3.2870142459869385, - "learning_rate": 2.4093424565497977e-05, - "loss": 0.6954, - "step": 175830 - }, - { - "epoch": 1.5544829293304336, - "grad_norm": 1.6599255800247192, - "learning_rate": 2.4091951177826105e-05, - "loss": 0.6912, - "step": 175840 - }, - { - "epoch": 1.554571332590746, - "grad_norm": 1.4668525457382202, - "learning_rate": 2.4090477790154237e-05, - "loss": 0.5591, - "step": 175850 - }, - { - "epoch": 1.5546597358510583, - "grad_norm": 5.512995719909668, - "learning_rate": 2.4089004402482366e-05, - "loss": 0.6056, - "step": 175860 - }, - { - "epoch": 1.5547481391113704, - "grad_norm": 1.3192214965820312, - "learning_rate": 2.4087531014810494e-05, - "loss": 0.4609, - "step": 175870 - }, - { - "epoch": 1.5548365423716826, - "grad_norm": 2.4650135040283203, - "learning_rate": 2.4086057627138622e-05, - "loss": 0.7683, - "step": 175880 - }, - { - "epoch": 1.554924945631995, - "grad_norm": 4.417902946472168, - "learning_rate": 2.4084584239466754e-05, - "loss": 0.5397, - "step": 175890 - }, - { - "epoch": 1.5550133488923072, - "grad_norm": 2.7622385025024414, - "learning_rate": 2.4083110851794882e-05, - "loss": 0.5373, - "step": 175900 - }, - { - "epoch": 1.5551017521526194, - "grad_norm": 3.459970474243164, - "learning_rate": 2.408163746412301e-05, - "loss": 0.6649, - "step": 175910 - }, - { - "epoch": 1.5551901554129315, - "grad_norm": 1.4808686971664429, - "learning_rate": 2.408016407645114e-05, - "loss": 0.6733, - "step": 175920 - }, - { - "epoch": 1.5552785586732438, - "grad_norm": 3.042663097381592, - "learning_rate": 2.407869068877927e-05, - "loss": 0.6793, - "step": 175930 - }, - { - "epoch": 1.5553669619335562, - "grad_norm": 8.79601001739502, - "learning_rate": 2.40772173011074e-05, - "loss": 0.5742, - "step": 175940 - }, - { - "epoch": 1.5554553651938683, - "grad_norm": 3.032383680343628, - "learning_rate": 2.4075743913435528e-05, - "loss": 0.6242, - "step": 175950 - }, - { - "epoch": 1.5555437684541806, - "grad_norm": 4.006124496459961, - "learning_rate": 2.407427052576366e-05, - "loss": 0.5409, - "step": 175960 - }, - { - "epoch": 1.555632171714493, - "grad_norm": 3.9470105171203613, - "learning_rate": 2.4072797138091788e-05, - "loss": 0.628, - "step": 175970 - }, - { - "epoch": 1.555720574974805, - "grad_norm": 6.362183094024658, - "learning_rate": 2.4071323750419916e-05, - "loss": 0.6373, - "step": 175980 - }, - { - "epoch": 1.5558089782351172, - "grad_norm": 1.9918493032455444, - "learning_rate": 2.4069850362748044e-05, - "loss": 0.5338, - "step": 175990 - }, - { - "epoch": 1.5558973814954296, - "grad_norm": 1.1010850667953491, - "learning_rate": 2.4068376975076176e-05, - "loss": 0.7065, - "step": 176000 - }, - { - "epoch": 1.555985784755742, - "grad_norm": 1.8384859561920166, - "learning_rate": 2.4066903587404304e-05, - "loss": 0.6622, - "step": 176010 - }, - { - "epoch": 1.556074188016054, - "grad_norm": 1.623852014541626, - "learning_rate": 2.4065430199732433e-05, - "loss": 0.6095, - "step": 176020 - }, - { - "epoch": 1.5561625912763661, - "grad_norm": 2.5296456813812256, - "learning_rate": 2.4063956812060564e-05, - "loss": 0.7166, - "step": 176030 - }, - { - "epoch": 1.5562509945366785, - "grad_norm": 4.086573123931885, - "learning_rate": 2.4062483424388693e-05, - "loss": 0.6613, - "step": 176040 - }, - { - "epoch": 1.5563393977969908, - "grad_norm": 3.509824275970459, - "learning_rate": 2.406101003671682e-05, - "loss": 0.6629, - "step": 176050 - }, - { - "epoch": 1.556427801057303, - "grad_norm": 2.155970811843872, - "learning_rate": 2.405953664904495e-05, - "loss": 0.4275, - "step": 176060 - }, - { - "epoch": 1.5565162043176153, - "grad_norm": 6.274196147918701, - "learning_rate": 2.405806326137308e-05, - "loss": 0.6362, - "step": 176070 - }, - { - "epoch": 1.5566046075779276, - "grad_norm": 1.7009046077728271, - "learning_rate": 2.405658987370121e-05, - "loss": 0.5711, - "step": 176080 - }, - { - "epoch": 1.5566930108382397, - "grad_norm": 4.581592559814453, - "learning_rate": 2.4055116486029338e-05, - "loss": 0.7456, - "step": 176090 - }, - { - "epoch": 1.5567814140985519, - "grad_norm": 6.589500427246094, - "learning_rate": 2.4053643098357466e-05, - "loss": 0.5122, - "step": 176100 - }, - { - "epoch": 1.5568698173588642, - "grad_norm": 6.289793014526367, - "learning_rate": 2.4052169710685598e-05, - "loss": 0.7746, - "step": 176110 - }, - { - "epoch": 1.5569582206191765, - "grad_norm": 1.7730607986450195, - "learning_rate": 2.4050696323013726e-05, - "loss": 0.5726, - "step": 176120 - }, - { - "epoch": 1.5570466238794887, - "grad_norm": 7.2734575271606445, - "learning_rate": 2.4049222935341855e-05, - "loss": 0.5616, - "step": 176130 - }, - { - "epoch": 1.5571350271398008, - "grad_norm": 2.77254581451416, - "learning_rate": 2.4047749547669987e-05, - "loss": 0.5998, - "step": 176140 - }, - { - "epoch": 1.5572234304001131, - "grad_norm": 1.6330997943878174, - "learning_rate": 2.4046276159998115e-05, - "loss": 0.4892, - "step": 176150 - }, - { - "epoch": 1.5573118336604255, - "grad_norm": 1.2648051977157593, - "learning_rate": 2.4044802772326243e-05, - "loss": 0.5772, - "step": 176160 - }, - { - "epoch": 1.5574002369207376, - "grad_norm": 8.252049446105957, - "learning_rate": 2.404332938465437e-05, - "loss": 0.6662, - "step": 176170 - }, - { - "epoch": 1.55748864018105, - "grad_norm": 9.004280090332031, - "learning_rate": 2.4041855996982503e-05, - "loss": 0.6576, - "step": 176180 - }, - { - "epoch": 1.5575770434413623, - "grad_norm": 2.663370132446289, - "learning_rate": 2.4040382609310632e-05, - "loss": 0.5891, - "step": 176190 - }, - { - "epoch": 1.5576654467016744, - "grad_norm": 8.976247787475586, - "learning_rate": 2.403890922163876e-05, - "loss": 0.554, - "step": 176200 - }, - { - "epoch": 1.5577538499619865, - "grad_norm": 10.197449684143066, - "learning_rate": 2.403743583396689e-05, - "loss": 0.6058, - "step": 176210 - }, - { - "epoch": 1.5578422532222989, - "grad_norm": 6.126716613769531, - "learning_rate": 2.403596244629502e-05, - "loss": 0.5604, - "step": 176220 - }, - { - "epoch": 1.5579306564826112, - "grad_norm": 1.966170072555542, - "learning_rate": 2.403448905862315e-05, - "loss": 0.5347, - "step": 176230 - }, - { - "epoch": 1.5580190597429233, - "grad_norm": 6.774153709411621, - "learning_rate": 2.4033015670951277e-05, - "loss": 0.6212, - "step": 176240 - }, - { - "epoch": 1.5581074630032354, - "grad_norm": 3.204974412918091, - "learning_rate": 2.403154228327941e-05, - "loss": 0.6954, - "step": 176250 - }, - { - "epoch": 1.5581958662635478, - "grad_norm": 2.6879467964172363, - "learning_rate": 2.4030068895607537e-05, - "loss": 0.595, - "step": 176260 - }, - { - "epoch": 1.5582842695238601, - "grad_norm": 2.4986608028411865, - "learning_rate": 2.4028595507935665e-05, - "loss": 0.4526, - "step": 176270 - }, - { - "epoch": 1.5583726727841722, - "grad_norm": 17.314674377441406, - "learning_rate": 2.4027122120263794e-05, - "loss": 0.5682, - "step": 176280 - }, - { - "epoch": 1.5584610760444844, - "grad_norm": 10.226405143737793, - "learning_rate": 2.4025648732591925e-05, - "loss": 0.6113, - "step": 176290 - }, - { - "epoch": 1.558549479304797, - "grad_norm": 2.781558036804199, - "learning_rate": 2.4024175344920054e-05, - "loss": 0.5478, - "step": 176300 - }, - { - "epoch": 1.558637882565109, - "grad_norm": 2.374439001083374, - "learning_rate": 2.4022701957248182e-05, - "loss": 0.4678, - "step": 176310 - }, - { - "epoch": 1.5587262858254212, - "grad_norm": 1.906740427017212, - "learning_rate": 2.4021228569576314e-05, - "loss": 0.6328, - "step": 176320 - }, - { - "epoch": 1.5588146890857335, - "grad_norm": 0.9516133069992065, - "learning_rate": 2.4019755181904442e-05, - "loss": 0.5999, - "step": 176330 - }, - { - "epoch": 1.5589030923460458, - "grad_norm": 2.762033462524414, - "learning_rate": 2.401828179423257e-05, - "loss": 0.6909, - "step": 176340 - }, - { - "epoch": 1.558991495606358, - "grad_norm": 1.5850287675857544, - "learning_rate": 2.4016808406560702e-05, - "loss": 0.5599, - "step": 176350 - }, - { - "epoch": 1.55907989886667, - "grad_norm": 1.2970778942108154, - "learning_rate": 2.401533501888883e-05, - "loss": 0.5156, - "step": 176360 - }, - { - "epoch": 1.5591683021269824, - "grad_norm": 2.068624973297119, - "learning_rate": 2.401386163121696e-05, - "loss": 0.6752, - "step": 176370 - }, - { - "epoch": 1.5592567053872948, - "grad_norm": 4.437560081481934, - "learning_rate": 2.401238824354509e-05, - "loss": 0.6849, - "step": 176380 - }, - { - "epoch": 1.559345108647607, - "grad_norm": 11.547019958496094, - "learning_rate": 2.401091485587322e-05, - "loss": 0.5907, - "step": 176390 - }, - { - "epoch": 1.559433511907919, - "grad_norm": 17.42321014404297, - "learning_rate": 2.4009441468201347e-05, - "loss": 0.7714, - "step": 176400 - }, - { - "epoch": 1.5595219151682314, - "grad_norm": 16.337127685546875, - "learning_rate": 2.400796808052948e-05, - "loss": 0.7394, - "step": 176410 - }, - { - "epoch": 1.5596103184285437, - "grad_norm": 4.904205799102783, - "learning_rate": 2.4006494692857608e-05, - "loss": 0.7249, - "step": 176420 - }, - { - "epoch": 1.5596987216888558, - "grad_norm": 5.072138786315918, - "learning_rate": 2.4005021305185736e-05, - "loss": 0.5635, - "step": 176430 - }, - { - "epoch": 1.5597871249491682, - "grad_norm": 0.8053647875785828, - "learning_rate": 2.4003547917513868e-05, - "loss": 0.5783, - "step": 176440 - }, - { - "epoch": 1.5598755282094805, - "grad_norm": 2.298722743988037, - "learning_rate": 2.4002074529841996e-05, - "loss": 0.5057, - "step": 176450 - }, - { - "epoch": 1.5599639314697926, - "grad_norm": 3.5129120349884033, - "learning_rate": 2.4000601142170124e-05, - "loss": 0.6155, - "step": 176460 - }, - { - "epoch": 1.5600523347301047, - "grad_norm": 6.439996242523193, - "learning_rate": 2.3999127754498256e-05, - "loss": 0.4902, - "step": 176470 - }, - { - "epoch": 1.560140737990417, - "grad_norm": 2.6693789958953857, - "learning_rate": 2.3997654366826384e-05, - "loss": 0.6106, - "step": 176480 - }, - { - "epoch": 1.5602291412507294, - "grad_norm": 2.057903528213501, - "learning_rate": 2.3996180979154513e-05, - "loss": 0.4878, - "step": 176490 - }, - { - "epoch": 1.5603175445110415, - "grad_norm": 4.72182035446167, - "learning_rate": 2.3994707591482645e-05, - "loss": 0.5856, - "step": 176500 - }, - { - "epoch": 1.5604059477713537, - "grad_norm": 7.757410049438477, - "learning_rate": 2.3993234203810773e-05, - "loss": 0.5483, - "step": 176510 - }, - { - "epoch": 1.560494351031666, - "grad_norm": 5.265251159667969, - "learning_rate": 2.39917608161389e-05, - "loss": 0.5218, - "step": 176520 - }, - { - "epoch": 1.5605827542919783, - "grad_norm": 6.178158283233643, - "learning_rate": 2.399028742846703e-05, - "loss": 0.5849, - "step": 176530 - }, - { - "epoch": 1.5606711575522905, - "grad_norm": 4.1125874519348145, - "learning_rate": 2.398881404079516e-05, - "loss": 0.6595, - "step": 176540 - }, - { - "epoch": 1.5607595608126028, - "grad_norm": 1.4558113813400269, - "learning_rate": 2.398734065312329e-05, - "loss": 0.5954, - "step": 176550 - }, - { - "epoch": 1.5608479640729152, - "grad_norm": 3.422471523284912, - "learning_rate": 2.3985867265451418e-05, - "loss": 0.551, - "step": 176560 - }, - { - "epoch": 1.5609363673332273, - "grad_norm": 2.989250659942627, - "learning_rate": 2.3984393877779546e-05, - "loss": 0.5014, - "step": 176570 - }, - { - "epoch": 1.5610247705935394, - "grad_norm": 9.05696964263916, - "learning_rate": 2.3982920490107678e-05, - "loss": 0.7459, - "step": 176580 - }, - { - "epoch": 1.5611131738538517, - "grad_norm": 2.3023223876953125, - "learning_rate": 2.3981447102435807e-05, - "loss": 0.5984, - "step": 176590 - }, - { - "epoch": 1.561201577114164, - "grad_norm": 6.538005352020264, - "learning_rate": 2.3979973714763935e-05, - "loss": 0.6604, - "step": 176600 - }, - { - "epoch": 1.5612899803744762, - "grad_norm": 7.982250690460205, - "learning_rate": 2.3978500327092067e-05, - "loss": 0.5231, - "step": 176610 - }, - { - "epoch": 1.5613783836347883, - "grad_norm": 4.236746311187744, - "learning_rate": 2.3977026939420195e-05, - "loss": 0.5771, - "step": 176620 - }, - { - "epoch": 1.5614667868951007, - "grad_norm": 1.882865071296692, - "learning_rate": 2.3975553551748323e-05, - "loss": 0.4318, - "step": 176630 - }, - { - "epoch": 1.561555190155413, - "grad_norm": 4.176548480987549, - "learning_rate": 2.397408016407645e-05, - "loss": 0.6464, - "step": 176640 - }, - { - "epoch": 1.5616435934157251, - "grad_norm": 8.582466125488281, - "learning_rate": 2.3972606776404583e-05, - "loss": 0.5813, - "step": 176650 - }, - { - "epoch": 1.5617319966760375, - "grad_norm": 5.795729637145996, - "learning_rate": 2.3971133388732712e-05, - "loss": 0.5705, - "step": 176660 - }, - { - "epoch": 1.5618203999363498, - "grad_norm": 6.867427349090576, - "learning_rate": 2.396966000106084e-05, - "loss": 0.5965, - "step": 176670 - }, - { - "epoch": 1.561908803196662, - "grad_norm": 1.722930908203125, - "learning_rate": 2.3968186613388972e-05, - "loss": 0.5778, - "step": 176680 - }, - { - "epoch": 1.561997206456974, - "grad_norm": 21.46084976196289, - "learning_rate": 2.39667132257171e-05, - "loss": 0.6877, - "step": 176690 - }, - { - "epoch": 1.5620856097172864, - "grad_norm": 1.9725826978683472, - "learning_rate": 2.396523983804523e-05, - "loss": 0.5208, - "step": 176700 - }, - { - "epoch": 1.5621740129775987, - "grad_norm": 2.8085975646972656, - "learning_rate": 2.3963766450373357e-05, - "loss": 0.5655, - "step": 176710 - }, - { - "epoch": 1.5622624162379108, - "grad_norm": 1.2130274772644043, - "learning_rate": 2.396229306270149e-05, - "loss": 0.5443, - "step": 176720 - }, - { - "epoch": 1.562350819498223, - "grad_norm": 1.321975588798523, - "learning_rate": 2.3960819675029617e-05, - "loss": 0.6458, - "step": 176730 - }, - { - "epoch": 1.5624392227585353, - "grad_norm": 2.276165008544922, - "learning_rate": 2.3959346287357745e-05, - "loss": 0.545, - "step": 176740 - }, - { - "epoch": 1.5625276260188476, - "grad_norm": 1.0494939088821411, - "learning_rate": 2.3957872899685874e-05, - "loss": 0.6316, - "step": 176750 - }, - { - "epoch": 1.5626160292791598, - "grad_norm": 3.4098010063171387, - "learning_rate": 2.3956399512014005e-05, - "loss": 0.5825, - "step": 176760 - }, - { - "epoch": 1.562704432539472, - "grad_norm": 1.3356003761291504, - "learning_rate": 2.3954926124342134e-05, - "loss": 0.5101, - "step": 176770 - }, - { - "epoch": 1.5627928357997845, - "grad_norm": 2.826198101043701, - "learning_rate": 2.3953452736670262e-05, - "loss": 0.6422, - "step": 176780 - }, - { - "epoch": 1.5628812390600966, - "grad_norm": 7.472723484039307, - "learning_rate": 2.3951979348998394e-05, - "loss": 0.5543, - "step": 176790 - }, - { - "epoch": 1.5629696423204087, - "grad_norm": 1.6344304084777832, - "learning_rate": 2.3950505961326522e-05, - "loss": 0.5944, - "step": 176800 - }, - { - "epoch": 1.563058045580721, - "grad_norm": 1.928816556930542, - "learning_rate": 2.394903257365465e-05, - "loss": 0.5692, - "step": 176810 - }, - { - "epoch": 1.5631464488410334, - "grad_norm": 1.4873366355895996, - "learning_rate": 2.394755918598278e-05, - "loss": 0.6319, - "step": 176820 - }, - { - "epoch": 1.5632348521013455, - "grad_norm": 3.338905096054077, - "learning_rate": 2.394608579831091e-05, - "loss": 0.576, - "step": 176830 - }, - { - "epoch": 1.5633232553616576, - "grad_norm": 10.962322235107422, - "learning_rate": 2.394461241063904e-05, - "loss": 0.6467, - "step": 176840 - }, - { - "epoch": 1.56341165862197, - "grad_norm": 2.717649221420288, - "learning_rate": 2.3943139022967167e-05, - "loss": 0.6829, - "step": 176850 - }, - { - "epoch": 1.5635000618822823, - "grad_norm": 6.951993942260742, - "learning_rate": 2.3941665635295296e-05, - "loss": 0.5802, - "step": 176860 - }, - { - "epoch": 1.5635884651425944, - "grad_norm": 1.9833133220672607, - "learning_rate": 2.3940192247623428e-05, - "loss": 0.5176, - "step": 176870 - }, - { - "epoch": 1.5636768684029065, - "grad_norm": 1.712316870689392, - "learning_rate": 2.3938718859951556e-05, - "loss": 0.5376, - "step": 176880 - }, - { - "epoch": 1.563765271663219, - "grad_norm": 1.2919236421585083, - "learning_rate": 2.3937245472279684e-05, - "loss": 0.5559, - "step": 176890 - }, - { - "epoch": 1.5638536749235312, - "grad_norm": 11.732109069824219, - "learning_rate": 2.3935772084607816e-05, - "loss": 0.7083, - "step": 176900 - }, - { - "epoch": 1.5639420781838433, - "grad_norm": 4.852503299713135, - "learning_rate": 2.3934298696935944e-05, - "loss": 0.513, - "step": 176910 - }, - { - "epoch": 1.5640304814441557, - "grad_norm": 1.8035823106765747, - "learning_rate": 2.3932825309264073e-05, - "loss": 0.6046, - "step": 176920 - }, - { - "epoch": 1.564118884704468, - "grad_norm": 1.3335148096084595, - "learning_rate": 2.39313519215922e-05, - "loss": 0.6209, - "step": 176930 - }, - { - "epoch": 1.5642072879647801, - "grad_norm": 2.9932949542999268, - "learning_rate": 2.3929878533920333e-05, - "loss": 0.5952, - "step": 176940 - }, - { - "epoch": 1.5642956912250923, - "grad_norm": 2.053060531616211, - "learning_rate": 2.392840514624846e-05, - "loss": 0.6696, - "step": 176950 - }, - { - "epoch": 1.5643840944854046, - "grad_norm": 8.876794815063477, - "learning_rate": 2.392693175857659e-05, - "loss": 0.6265, - "step": 176960 - }, - { - "epoch": 1.564472497745717, - "grad_norm": 1.3175852298736572, - "learning_rate": 2.392545837090472e-05, - "loss": 0.5908, - "step": 176970 - }, - { - "epoch": 1.564560901006029, - "grad_norm": 1.8960905075073242, - "learning_rate": 2.392398498323285e-05, - "loss": 0.5778, - "step": 176980 - }, - { - "epoch": 1.5646493042663412, - "grad_norm": 4.270967483520508, - "learning_rate": 2.3922511595560978e-05, - "loss": 0.6514, - "step": 176990 - }, - { - "epoch": 1.5647377075266535, - "grad_norm": 4.1663689613342285, - "learning_rate": 2.3921038207889106e-05, - "loss": 0.5853, - "step": 177000 - }, - { - "epoch": 1.5648261107869659, - "grad_norm": 1.3195915222167969, - "learning_rate": 2.3919564820217238e-05, - "loss": 0.6515, - "step": 177010 - }, - { - "epoch": 1.564914514047278, - "grad_norm": 12.04109001159668, - "learning_rate": 2.3918091432545366e-05, - "loss": 0.5227, - "step": 177020 - }, - { - "epoch": 1.5650029173075903, - "grad_norm": 1.4092333316802979, - "learning_rate": 2.3916618044873495e-05, - "loss": 0.6642, - "step": 177030 - }, - { - "epoch": 1.5650913205679027, - "grad_norm": 3.3699228763580322, - "learning_rate": 2.3915144657201623e-05, - "loss": 0.5208, - "step": 177040 - }, - { - "epoch": 1.5651797238282148, - "grad_norm": 2.845052480697632, - "learning_rate": 2.3913671269529755e-05, - "loss": 0.6047, - "step": 177050 - }, - { - "epoch": 1.565268127088527, - "grad_norm": 5.391455173492432, - "learning_rate": 2.3912197881857883e-05, - "loss": 0.5443, - "step": 177060 - }, - { - "epoch": 1.5653565303488393, - "grad_norm": 1.5082225799560547, - "learning_rate": 2.391072449418601e-05, - "loss": 0.6542, - "step": 177070 - }, - { - "epoch": 1.5654449336091516, - "grad_norm": 0.50357586145401, - "learning_rate": 2.3909251106514143e-05, - "loss": 0.4891, - "step": 177080 - }, - { - "epoch": 1.5655333368694637, - "grad_norm": 2.5022904872894287, - "learning_rate": 2.390777771884227e-05, - "loss": 0.6431, - "step": 177090 - }, - { - "epoch": 1.5656217401297758, - "grad_norm": 1.3826407194137573, - "learning_rate": 2.39063043311704e-05, - "loss": 0.5379, - "step": 177100 - }, - { - "epoch": 1.5657101433900882, - "grad_norm": 2.0808982849121094, - "learning_rate": 2.390483094349853e-05, - "loss": 0.4533, - "step": 177110 - }, - { - "epoch": 1.5657985466504005, - "grad_norm": 1.225738286972046, - "learning_rate": 2.390335755582666e-05, - "loss": 0.6256, - "step": 177120 - }, - { - "epoch": 1.5658869499107126, - "grad_norm": 1.853959321975708, - "learning_rate": 2.390188416815479e-05, - "loss": 0.5257, - "step": 177130 - }, - { - "epoch": 1.565975353171025, - "grad_norm": 1.8163670301437378, - "learning_rate": 2.3900410780482917e-05, - "loss": 0.5124, - "step": 177140 - }, - { - "epoch": 1.5660637564313373, - "grad_norm": 2.156202554702759, - "learning_rate": 2.389893739281105e-05, - "loss": 0.4939, - "step": 177150 - }, - { - "epoch": 1.5661521596916494, - "grad_norm": 1.2103588581085205, - "learning_rate": 2.3897464005139177e-05, - "loss": 0.527, - "step": 177160 - }, - { - "epoch": 1.5662405629519616, - "grad_norm": 7.839535713195801, - "learning_rate": 2.3895990617467305e-05, - "loss": 0.6572, - "step": 177170 - }, - { - "epoch": 1.566328966212274, - "grad_norm": 1.2732614278793335, - "learning_rate": 2.3894517229795434e-05, - "loss": 0.7046, - "step": 177180 - }, - { - "epoch": 1.5664173694725863, - "grad_norm": 1.1127547025680542, - "learning_rate": 2.3893043842123565e-05, - "loss": 0.6706, - "step": 177190 - }, - { - "epoch": 1.5665057727328984, - "grad_norm": 1.468625545501709, - "learning_rate": 2.3891570454451694e-05, - "loss": 0.5455, - "step": 177200 - }, - { - "epoch": 1.5665941759932105, - "grad_norm": 1.4811850786209106, - "learning_rate": 2.3890097066779822e-05, - "loss": 0.5632, - "step": 177210 - }, - { - "epoch": 1.5666825792535228, - "grad_norm": 13.830153465270996, - "learning_rate": 2.388862367910795e-05, - "loss": 0.6757, - "step": 177220 - }, - { - "epoch": 1.5667709825138352, - "grad_norm": 2.274662971496582, - "learning_rate": 2.3887150291436082e-05, - "loss": 0.5751, - "step": 177230 - }, - { - "epoch": 1.5668593857741473, - "grad_norm": 3.429499626159668, - "learning_rate": 2.388567690376421e-05, - "loss": 0.5757, - "step": 177240 - }, - { - "epoch": 1.5669477890344596, - "grad_norm": 4.459138870239258, - "learning_rate": 2.388420351609234e-05, - "loss": 0.6723, - "step": 177250 - }, - { - "epoch": 1.567036192294772, - "grad_norm": 2.4164700508117676, - "learning_rate": 2.388273012842047e-05, - "loss": 0.5153, - "step": 177260 - }, - { - "epoch": 1.567124595555084, - "grad_norm": 2.449877977371216, - "learning_rate": 2.38812567407486e-05, - "loss": 0.8246, - "step": 177270 - }, - { - "epoch": 1.5672129988153962, - "grad_norm": 3.5852701663970947, - "learning_rate": 2.3879783353076727e-05, - "loss": 0.7463, - "step": 177280 - }, - { - "epoch": 1.5673014020757086, - "grad_norm": 1.6659650802612305, - "learning_rate": 2.387830996540486e-05, - "loss": 0.5225, - "step": 177290 - }, - { - "epoch": 1.567389805336021, - "grad_norm": 2.45802903175354, - "learning_rate": 2.3876836577732987e-05, - "loss": 0.5015, - "step": 177300 - }, - { - "epoch": 1.567478208596333, - "grad_norm": 1.3427000045776367, - "learning_rate": 2.3875363190061116e-05, - "loss": 0.579, - "step": 177310 - }, - { - "epoch": 1.5675666118566451, - "grad_norm": 5.278824329376221, - "learning_rate": 2.3873889802389248e-05, - "loss": 0.6429, - "step": 177320 - }, - { - "epoch": 1.5676550151169575, - "grad_norm": 1.3126202821731567, - "learning_rate": 2.3872416414717376e-05, - "loss": 0.5997, - "step": 177330 - }, - { - "epoch": 1.5677434183772698, - "grad_norm": 2.5202386379241943, - "learning_rate": 2.3870943027045504e-05, - "loss": 0.5816, - "step": 177340 - }, - { - "epoch": 1.567831821637582, - "grad_norm": 5.427288055419922, - "learning_rate": 2.3869469639373636e-05, - "loss": 0.7357, - "step": 177350 - }, - { - "epoch": 1.5679202248978943, - "grad_norm": 5.297801971435547, - "learning_rate": 2.3867996251701764e-05, - "loss": 0.5988, - "step": 177360 - }, - { - "epoch": 1.5680086281582066, - "grad_norm": 2.1023435592651367, - "learning_rate": 2.3866522864029893e-05, - "loss": 0.6773, - "step": 177370 - }, - { - "epoch": 1.5680970314185187, - "grad_norm": 3.128377914428711, - "learning_rate": 2.3865049476358024e-05, - "loss": 0.458, - "step": 177380 - }, - { - "epoch": 1.5681854346788309, - "grad_norm": 1.211957335472107, - "learning_rate": 2.3863576088686153e-05, - "loss": 0.568, - "step": 177390 - }, - { - "epoch": 1.5682738379391432, - "grad_norm": 2.160508871078491, - "learning_rate": 2.386210270101428e-05, - "loss": 0.73, - "step": 177400 - }, - { - "epoch": 1.5683622411994556, - "grad_norm": 1.121775507926941, - "learning_rate": 2.3860629313342413e-05, - "loss": 0.6873, - "step": 177410 - }, - { - "epoch": 1.5684506444597677, - "grad_norm": 9.46325969696045, - "learning_rate": 2.385915592567054e-05, - "loss": 0.5065, - "step": 177420 - }, - { - "epoch": 1.5685390477200798, - "grad_norm": 10.104605674743652, - "learning_rate": 2.385768253799867e-05, - "loss": 0.6625, - "step": 177430 - }, - { - "epoch": 1.5686274509803921, - "grad_norm": 4.208607196807861, - "learning_rate": 2.38562091503268e-05, - "loss": 0.5357, - "step": 177440 - }, - { - "epoch": 1.5687158542407045, - "grad_norm": 7.169487476348877, - "learning_rate": 2.385473576265493e-05, - "loss": 0.6935, - "step": 177450 - }, - { - "epoch": 1.5688042575010166, - "grad_norm": 2.8854124546051025, - "learning_rate": 2.3853262374983058e-05, - "loss": 0.5694, - "step": 177460 - }, - { - "epoch": 1.5688926607613287, - "grad_norm": 2.144292116165161, - "learning_rate": 2.3851788987311186e-05, - "loss": 0.5011, - "step": 177470 - }, - { - "epoch": 1.5689810640216413, - "grad_norm": 2.791090965270996, - "learning_rate": 2.3850315599639318e-05, - "loss": 0.742, - "step": 177480 - }, - { - "epoch": 1.5690694672819534, - "grad_norm": 10.48210620880127, - "learning_rate": 2.3848842211967446e-05, - "loss": 0.4566, - "step": 177490 - }, - { - "epoch": 1.5691578705422655, - "grad_norm": 2.357089042663574, - "learning_rate": 2.3847368824295575e-05, - "loss": 0.5526, - "step": 177500 - }, - { - "epoch": 1.5692462738025779, - "grad_norm": 2.4185054302215576, - "learning_rate": 2.3845895436623703e-05, - "loss": 0.5614, - "step": 177510 - }, - { - "epoch": 1.5693346770628902, - "grad_norm": 2.3175907135009766, - "learning_rate": 2.3844422048951835e-05, - "loss": 0.6569, - "step": 177520 - }, - { - "epoch": 1.5694230803232023, - "grad_norm": 5.830840110778809, - "learning_rate": 2.3842948661279963e-05, - "loss": 0.6688, - "step": 177530 - }, - { - "epoch": 1.5695114835835144, - "grad_norm": 2.5840630531311035, - "learning_rate": 2.384147527360809e-05, - "loss": 0.5568, - "step": 177540 - }, - { - "epoch": 1.5695998868438268, - "grad_norm": 3.6981008052825928, - "learning_rate": 2.3840001885936223e-05, - "loss": 0.455, - "step": 177550 - }, - { - "epoch": 1.5696882901041391, - "grad_norm": 2.26568865776062, - "learning_rate": 2.3838528498264352e-05, - "loss": 0.6471, - "step": 177560 - }, - { - "epoch": 1.5697766933644512, - "grad_norm": 3.4982657432556152, - "learning_rate": 2.383705511059248e-05, - "loss": 0.6207, - "step": 177570 - }, - { - "epoch": 1.5698650966247634, - "grad_norm": 11.61485481262207, - "learning_rate": 2.383558172292061e-05, - "loss": 0.6342, - "step": 177580 - }, - { - "epoch": 1.5699534998850757, - "grad_norm": 2.927955150604248, - "learning_rate": 2.383410833524874e-05, - "loss": 0.6117, - "step": 177590 - }, - { - "epoch": 1.570041903145388, - "grad_norm": 3.3633296489715576, - "learning_rate": 2.383263494757687e-05, - "loss": 0.6254, - "step": 177600 - }, - { - "epoch": 1.5701303064057002, - "grad_norm": 3.2824857234954834, - "learning_rate": 2.3831161559904997e-05, - "loss": 0.6156, - "step": 177610 - }, - { - "epoch": 1.5702187096660125, - "grad_norm": 1.9438114166259766, - "learning_rate": 2.382968817223313e-05, - "loss": 0.6046, - "step": 177620 - }, - { - "epoch": 1.5703071129263249, - "grad_norm": 9.858379364013672, - "learning_rate": 2.3828214784561257e-05, - "loss": 0.5039, - "step": 177630 - }, - { - "epoch": 1.570395516186637, - "grad_norm": 1.7021639347076416, - "learning_rate": 2.3826741396889385e-05, - "loss": 0.5935, - "step": 177640 - }, - { - "epoch": 1.570483919446949, - "grad_norm": 2.1032676696777344, - "learning_rate": 2.3825268009217514e-05, - "loss": 0.5879, - "step": 177650 - }, - { - "epoch": 1.5705723227072614, - "grad_norm": 1.8284764289855957, - "learning_rate": 2.3823794621545645e-05, - "loss": 0.5119, - "step": 177660 - }, - { - "epoch": 1.5706607259675738, - "grad_norm": 1.3647922277450562, - "learning_rate": 2.3822321233873774e-05, - "loss": 0.5886, - "step": 177670 - }, - { - "epoch": 1.570749129227886, - "grad_norm": 2.7354862689971924, - "learning_rate": 2.3820847846201902e-05, - "loss": 0.6214, - "step": 177680 - }, - { - "epoch": 1.570837532488198, - "grad_norm": 3.5631704330444336, - "learning_rate": 2.381937445853003e-05, - "loss": 0.5461, - "step": 177690 - }, - { - "epoch": 1.5709259357485104, - "grad_norm": 1.2752279043197632, - "learning_rate": 2.3817901070858162e-05, - "loss": 0.5554, - "step": 177700 - }, - { - "epoch": 1.5710143390088227, - "grad_norm": 3.2722902297973633, - "learning_rate": 2.381642768318629e-05, - "loss": 0.6625, - "step": 177710 - }, - { - "epoch": 1.5711027422691348, - "grad_norm": 1.4920198917388916, - "learning_rate": 2.381495429551442e-05, - "loss": 0.6535, - "step": 177720 - }, - { - "epoch": 1.5711911455294472, - "grad_norm": 2.660507917404175, - "learning_rate": 2.381348090784255e-05, - "loss": 0.6356, - "step": 177730 - }, - { - "epoch": 1.5712795487897595, - "grad_norm": 2.067870616912842, - "learning_rate": 2.381200752017068e-05, - "loss": 0.5859, - "step": 177740 - }, - { - "epoch": 1.5713679520500716, - "grad_norm": 1.3267247676849365, - "learning_rate": 2.3810534132498807e-05, - "loss": 0.4893, - "step": 177750 - }, - { - "epoch": 1.5714563553103837, - "grad_norm": 1.94123375415802, - "learning_rate": 2.3809060744826936e-05, - "loss": 0.5941, - "step": 177760 - }, - { - "epoch": 1.571544758570696, - "grad_norm": 5.056488037109375, - "learning_rate": 2.3807587357155067e-05, - "loss": 0.6583, - "step": 177770 - }, - { - "epoch": 1.5716331618310084, - "grad_norm": 1.3272475004196167, - "learning_rate": 2.3806113969483196e-05, - "loss": 0.5946, - "step": 177780 - }, - { - "epoch": 1.5717215650913205, - "grad_norm": 4.6792497634887695, - "learning_rate": 2.3804640581811324e-05, - "loss": 0.6402, - "step": 177790 - }, - { - "epoch": 1.5718099683516327, - "grad_norm": 2.2302956581115723, - "learning_rate": 2.3803167194139453e-05, - "loss": 0.6164, - "step": 177800 - }, - { - "epoch": 1.571898371611945, - "grad_norm": 5.343714714050293, - "learning_rate": 2.3801693806467584e-05, - "loss": 0.7028, - "step": 177810 - }, - { - "epoch": 1.5719867748722574, - "grad_norm": 2.8348476886749268, - "learning_rate": 2.3800220418795713e-05, - "loss": 0.6744, - "step": 177820 - }, - { - "epoch": 1.5720751781325695, - "grad_norm": 3.4458703994750977, - "learning_rate": 2.379874703112384e-05, - "loss": 0.6044, - "step": 177830 - }, - { - "epoch": 1.5721635813928818, - "grad_norm": 3.3119430541992188, - "learning_rate": 2.3797273643451973e-05, - "loss": 0.6851, - "step": 177840 - }, - { - "epoch": 1.5722519846531942, - "grad_norm": 14.754148483276367, - "learning_rate": 2.37958002557801e-05, - "loss": 0.732, - "step": 177850 - }, - { - "epoch": 1.5723403879135063, - "grad_norm": 1.7317376136779785, - "learning_rate": 2.379432686810823e-05, - "loss": 0.6028, - "step": 177860 - }, - { - "epoch": 1.5724287911738184, - "grad_norm": 2.2867050170898438, - "learning_rate": 2.3792853480436358e-05, - "loss": 0.5016, - "step": 177870 - }, - { - "epoch": 1.5725171944341307, - "grad_norm": 3.202863931655884, - "learning_rate": 2.379138009276449e-05, - "loss": 0.529, - "step": 177880 - }, - { - "epoch": 1.572605597694443, - "grad_norm": 7.062049865722656, - "learning_rate": 2.3789906705092618e-05, - "loss": 0.5519, - "step": 177890 - }, - { - "epoch": 1.5726940009547552, - "grad_norm": 3.202885150909424, - "learning_rate": 2.3788433317420746e-05, - "loss": 0.7217, - "step": 177900 - }, - { - "epoch": 1.5727824042150673, - "grad_norm": 1.4440633058547974, - "learning_rate": 2.3786959929748878e-05, - "loss": 0.6615, - "step": 177910 - }, - { - "epoch": 1.5728708074753797, - "grad_norm": 3.9629647731781006, - "learning_rate": 2.3785486542077006e-05, - "loss": 0.6506, - "step": 177920 - }, - { - "epoch": 1.572959210735692, - "grad_norm": 1.7773234844207764, - "learning_rate": 2.3784013154405135e-05, - "loss": 0.6561, - "step": 177930 - }, - { - "epoch": 1.5730476139960041, - "grad_norm": 10.660492897033691, - "learning_rate": 2.3782539766733263e-05, - "loss": 0.5801, - "step": 177940 - }, - { - "epoch": 1.5731360172563165, - "grad_norm": 1.918804407119751, - "learning_rate": 2.3781066379061395e-05, - "loss": 0.6901, - "step": 177950 - }, - { - "epoch": 1.5732244205166288, - "grad_norm": 1.9095244407653809, - "learning_rate": 2.3779592991389523e-05, - "loss": 0.6185, - "step": 177960 - }, - { - "epoch": 1.573312823776941, - "grad_norm": 3.5318918228149414, - "learning_rate": 2.377811960371765e-05, - "loss": 0.5582, - "step": 177970 - }, - { - "epoch": 1.573401227037253, - "grad_norm": 0.9088351726531982, - "learning_rate": 2.377664621604578e-05, - "loss": 0.526, - "step": 177980 - }, - { - "epoch": 1.5734896302975654, - "grad_norm": 2.974895715713501, - "learning_rate": 2.377517282837391e-05, - "loss": 0.6306, - "step": 177990 - }, - { - "epoch": 1.5735780335578777, - "grad_norm": 2.7470595836639404, - "learning_rate": 2.377369944070204e-05, - "loss": 0.7147, - "step": 178000 - }, - { - "epoch": 1.5736664368181899, - "grad_norm": 6.477626323699951, - "learning_rate": 2.377222605303017e-05, - "loss": 0.6333, - "step": 178010 - }, - { - "epoch": 1.573754840078502, - "grad_norm": 0.9369567632675171, - "learning_rate": 2.37707526653583e-05, - "loss": 0.6072, - "step": 178020 - }, - { - "epoch": 1.5738432433388143, - "grad_norm": 4.91400146484375, - "learning_rate": 2.376927927768643e-05, - "loss": 0.556, - "step": 178030 - }, - { - "epoch": 1.5739316465991267, - "grad_norm": 4.29938268661499, - "learning_rate": 2.3767805890014557e-05, - "loss": 0.6233, - "step": 178040 - }, - { - "epoch": 1.5740200498594388, - "grad_norm": 8.595135688781738, - "learning_rate": 2.3766332502342685e-05, - "loss": 0.5871, - "step": 178050 - }, - { - "epoch": 1.574108453119751, - "grad_norm": 1.551999807357788, - "learning_rate": 2.3764859114670817e-05, - "loss": 0.6797, - "step": 178060 - }, - { - "epoch": 1.5741968563800635, - "grad_norm": 4.337893962860107, - "learning_rate": 2.3763385726998945e-05, - "loss": 0.569, - "step": 178070 - }, - { - "epoch": 1.5742852596403756, - "grad_norm": 1.0492699146270752, - "learning_rate": 2.3761912339327074e-05, - "loss": 0.5351, - "step": 178080 - }, - { - "epoch": 1.5743736629006877, - "grad_norm": 4.99405574798584, - "learning_rate": 2.3760438951655205e-05, - "loss": 0.5742, - "step": 178090 - }, - { - "epoch": 1.574462066161, - "grad_norm": 1.6093047857284546, - "learning_rate": 2.3758965563983334e-05, - "loss": 0.7021, - "step": 178100 - }, - { - "epoch": 1.5745504694213124, - "grad_norm": 9.563122749328613, - "learning_rate": 2.3757492176311462e-05, - "loss": 0.6464, - "step": 178110 - }, - { - "epoch": 1.5746388726816245, - "grad_norm": 4.5364508628845215, - "learning_rate": 2.375601878863959e-05, - "loss": 0.5776, - "step": 178120 - }, - { - "epoch": 1.5747272759419366, - "grad_norm": 3.5077686309814453, - "learning_rate": 2.3754545400967722e-05, - "loss": 0.5347, - "step": 178130 - }, - { - "epoch": 1.574815679202249, - "grad_norm": 10.026756286621094, - "learning_rate": 2.375307201329585e-05, - "loss": 0.6771, - "step": 178140 - }, - { - "epoch": 1.5749040824625613, - "grad_norm": 1.6047227382659912, - "learning_rate": 2.375159862562398e-05, - "loss": 0.5376, - "step": 178150 - }, - { - "epoch": 1.5749924857228734, - "grad_norm": 1.6588748693466187, - "learning_rate": 2.3750125237952107e-05, - "loss": 0.5695, - "step": 178160 - }, - { - "epoch": 1.5750808889831855, - "grad_norm": 2.7459299564361572, - "learning_rate": 2.374865185028024e-05, - "loss": 0.5176, - "step": 178170 - }, - { - "epoch": 1.5751692922434979, - "grad_norm": 4.993346214294434, - "learning_rate": 2.3747178462608367e-05, - "loss": 0.4711, - "step": 178180 - }, - { - "epoch": 1.5752576955038102, - "grad_norm": 3.7444307804107666, - "learning_rate": 2.3745705074936496e-05, - "loss": 0.4653, - "step": 178190 - }, - { - "epoch": 1.5753460987641223, - "grad_norm": 7.655012607574463, - "learning_rate": 2.3744231687264627e-05, - "loss": 0.5704, - "step": 178200 - }, - { - "epoch": 1.5754345020244347, - "grad_norm": 6.56545352935791, - "learning_rate": 2.3742758299592756e-05, - "loss": 0.6917, - "step": 178210 - }, - { - "epoch": 1.575522905284747, - "grad_norm": 1.851274013519287, - "learning_rate": 2.3741284911920887e-05, - "loss": 0.5544, - "step": 178220 - }, - { - "epoch": 1.5756113085450592, - "grad_norm": 2.2871317863464355, - "learning_rate": 2.3739811524249016e-05, - "loss": 0.4978, - "step": 178230 - }, - { - "epoch": 1.5756997118053713, - "grad_norm": 3.604883909225464, - "learning_rate": 2.3738338136577144e-05, - "loss": 0.6063, - "step": 178240 - }, - { - "epoch": 1.5757881150656836, - "grad_norm": 2.8319008350372314, - "learning_rate": 2.3736864748905276e-05, - "loss": 0.7282, - "step": 178250 - }, - { - "epoch": 1.575876518325996, - "grad_norm": 10.24183464050293, - "learning_rate": 2.3735391361233404e-05, - "loss": 0.6667, - "step": 178260 - }, - { - "epoch": 1.575964921586308, - "grad_norm": 2.5965993404388428, - "learning_rate": 2.3733917973561533e-05, - "loss": 0.5381, - "step": 178270 - }, - { - "epoch": 1.5760533248466202, - "grad_norm": 3.5896975994110107, - "learning_rate": 2.3732444585889664e-05, - "loss": 0.5891, - "step": 178280 - }, - { - "epoch": 1.5761417281069325, - "grad_norm": 6.681732177734375, - "learning_rate": 2.3730971198217793e-05, - "loss": 0.6866, - "step": 178290 - }, - { - "epoch": 1.5762301313672449, - "grad_norm": 2.867141008377075, - "learning_rate": 2.372949781054592e-05, - "loss": 0.6847, - "step": 178300 - }, - { - "epoch": 1.576318534627557, - "grad_norm": 1.4590797424316406, - "learning_rate": 2.3728024422874053e-05, - "loss": 0.5857, - "step": 178310 - }, - { - "epoch": 1.5764069378878693, - "grad_norm": 1.1472526788711548, - "learning_rate": 2.372655103520218e-05, - "loss": 0.6157, - "step": 178320 - }, - { - "epoch": 1.5764953411481817, - "grad_norm": 4.510525703430176, - "learning_rate": 2.372507764753031e-05, - "loss": 0.5637, - "step": 178330 - }, - { - "epoch": 1.5765837444084938, - "grad_norm": 6.114560604095459, - "learning_rate": 2.3723604259858438e-05, - "loss": 0.6339, - "step": 178340 - }, - { - "epoch": 1.576672147668806, - "grad_norm": 2.6327743530273438, - "learning_rate": 2.372213087218657e-05, - "loss": 0.5722, - "step": 178350 - }, - { - "epoch": 1.5767605509291183, - "grad_norm": 2.417829751968384, - "learning_rate": 2.3720657484514698e-05, - "loss": 0.6076, - "step": 178360 - }, - { - "epoch": 1.5768489541894306, - "grad_norm": 1.753642201423645, - "learning_rate": 2.3719184096842826e-05, - "loss": 0.5679, - "step": 178370 - }, - { - "epoch": 1.5769373574497427, - "grad_norm": 1.9006749391555786, - "learning_rate": 2.3717710709170958e-05, - "loss": 0.586, - "step": 178380 - }, - { - "epoch": 1.5770257607100548, - "grad_norm": 3.3040943145751953, - "learning_rate": 2.3716237321499086e-05, - "loss": 0.5879, - "step": 178390 - }, - { - "epoch": 1.5771141639703672, - "grad_norm": 2.73498272895813, - "learning_rate": 2.3714763933827215e-05, - "loss": 0.645, - "step": 178400 - }, - { - "epoch": 1.5772025672306795, - "grad_norm": 1.0006335973739624, - "learning_rate": 2.3713290546155343e-05, - "loss": 0.7305, - "step": 178410 - }, - { - "epoch": 1.5772909704909917, - "grad_norm": 2.9414331912994385, - "learning_rate": 2.3711817158483475e-05, - "loss": 0.7322, - "step": 178420 - }, - { - "epoch": 1.577379373751304, - "grad_norm": 1.0808783769607544, - "learning_rate": 2.3710343770811603e-05, - "loss": 0.5497, - "step": 178430 - }, - { - "epoch": 1.5774677770116163, - "grad_norm": 4.873090744018555, - "learning_rate": 2.370887038313973e-05, - "loss": 0.667, - "step": 178440 - }, - { - "epoch": 1.5775561802719285, - "grad_norm": 2.205275297164917, - "learning_rate": 2.370739699546786e-05, - "loss": 0.5755, - "step": 178450 - }, - { - "epoch": 1.5776445835322406, - "grad_norm": 3.3588781356811523, - "learning_rate": 2.370592360779599e-05, - "loss": 0.5621, - "step": 178460 - }, - { - "epoch": 1.577732986792553, - "grad_norm": 5.083580493927002, - "learning_rate": 2.370445022012412e-05, - "loss": 0.6306, - "step": 178470 - }, - { - "epoch": 1.5778213900528653, - "grad_norm": 4.142603874206543, - "learning_rate": 2.370297683245225e-05, - "loss": 0.5205, - "step": 178480 - }, - { - "epoch": 1.5779097933131774, - "grad_norm": 16.28163719177246, - "learning_rate": 2.370150344478038e-05, - "loss": 0.6707, - "step": 178490 - }, - { - "epoch": 1.5779981965734895, - "grad_norm": 3.596571445465088, - "learning_rate": 2.370003005710851e-05, - "loss": 0.5476, - "step": 178500 - }, - { - "epoch": 1.5780865998338018, - "grad_norm": 3.476890802383423, - "learning_rate": 2.3698556669436637e-05, - "loss": 0.5857, - "step": 178510 - }, - { - "epoch": 1.5781750030941142, - "grad_norm": 7.287332057952881, - "learning_rate": 2.3697083281764765e-05, - "loss": 0.7006, - "step": 178520 - }, - { - "epoch": 1.5782634063544263, - "grad_norm": 1.8955235481262207, - "learning_rate": 2.3695609894092897e-05, - "loss": 0.6019, - "step": 178530 - }, - { - "epoch": 1.5783518096147386, - "grad_norm": 1.8529415130615234, - "learning_rate": 2.3694136506421025e-05, - "loss": 0.6293, - "step": 178540 - }, - { - "epoch": 1.578440212875051, - "grad_norm": 11.360862731933594, - "learning_rate": 2.3692663118749154e-05, - "loss": 0.6061, - "step": 178550 - }, - { - "epoch": 1.578528616135363, - "grad_norm": 4.6779327392578125, - "learning_rate": 2.3691189731077285e-05, - "loss": 0.6599, - "step": 178560 - }, - { - "epoch": 1.5786170193956752, - "grad_norm": 1.919928789138794, - "learning_rate": 2.3689716343405414e-05, - "loss": 0.5257, - "step": 178570 - }, - { - "epoch": 1.5787054226559876, - "grad_norm": 4.567292213439941, - "learning_rate": 2.3688242955733542e-05, - "loss": 0.647, - "step": 178580 - }, - { - "epoch": 1.5787938259163, - "grad_norm": 2.994666814804077, - "learning_rate": 2.368676956806167e-05, - "loss": 0.5878, - "step": 178590 - }, - { - "epoch": 1.578882229176612, - "grad_norm": 2.907742738723755, - "learning_rate": 2.3685296180389802e-05, - "loss": 0.4936, - "step": 178600 - }, - { - "epoch": 1.5789706324369241, - "grad_norm": 5.553699970245361, - "learning_rate": 2.368382279271793e-05, - "loss": 0.5572, - "step": 178610 - }, - { - "epoch": 1.5790590356972365, - "grad_norm": 1.1139589548110962, - "learning_rate": 2.368234940504606e-05, - "loss": 0.4497, - "step": 178620 - }, - { - "epoch": 1.5791474389575488, - "grad_norm": 3.8644986152648926, - "learning_rate": 2.3680876017374187e-05, - "loss": 0.6286, - "step": 178630 - }, - { - "epoch": 1.579235842217861, - "grad_norm": 2.879363775253296, - "learning_rate": 2.367940262970232e-05, - "loss": 0.5567, - "step": 178640 - }, - { - "epoch": 1.579324245478173, - "grad_norm": 4.482773303985596, - "learning_rate": 2.3677929242030447e-05, - "loss": 0.5552, - "step": 178650 - }, - { - "epoch": 1.5794126487384856, - "grad_norm": 2.0084800720214844, - "learning_rate": 2.3676455854358576e-05, - "loss": 0.6186, - "step": 178660 - }, - { - "epoch": 1.5795010519987978, - "grad_norm": 1.4457485675811768, - "learning_rate": 2.3674982466686707e-05, - "loss": 0.5787, - "step": 178670 - }, - { - "epoch": 1.5795894552591099, - "grad_norm": 2.304863691329956, - "learning_rate": 2.3673509079014836e-05, - "loss": 0.5726, - "step": 178680 - }, - { - "epoch": 1.5796778585194222, - "grad_norm": 3.500981330871582, - "learning_rate": 2.3672035691342964e-05, - "loss": 0.5061, - "step": 178690 - }, - { - "epoch": 1.5797662617797346, - "grad_norm": 2.110438823699951, - "learning_rate": 2.3670562303671093e-05, - "loss": 0.6363, - "step": 178700 - }, - { - "epoch": 1.5798546650400467, - "grad_norm": 1.9318902492523193, - "learning_rate": 2.3669088915999224e-05, - "loss": 0.5963, - "step": 178710 - }, - { - "epoch": 1.5799430683003588, - "grad_norm": 3.0371882915496826, - "learning_rate": 2.3667615528327353e-05, - "loss": 0.5753, - "step": 178720 - }, - { - "epoch": 1.5800314715606711, - "grad_norm": 2.086472749710083, - "learning_rate": 2.366614214065548e-05, - "loss": 0.7206, - "step": 178730 - }, - { - "epoch": 1.5801198748209835, - "grad_norm": 3.5821118354797363, - "learning_rate": 2.3664668752983613e-05, - "loss": 0.4912, - "step": 178740 - }, - { - "epoch": 1.5802082780812956, - "grad_norm": 1.779372215270996, - "learning_rate": 2.366319536531174e-05, - "loss": 0.5519, - "step": 178750 - }, - { - "epoch": 1.5802966813416077, - "grad_norm": 1.6509251594543457, - "learning_rate": 2.366172197763987e-05, - "loss": 0.5721, - "step": 178760 - }, - { - "epoch": 1.58038508460192, - "grad_norm": 1.9885419607162476, - "learning_rate": 2.3660248589967998e-05, - "loss": 0.5759, - "step": 178770 - }, - { - "epoch": 1.5804734878622324, - "grad_norm": 1.6160720586776733, - "learning_rate": 2.365877520229613e-05, - "loss": 0.49, - "step": 178780 - }, - { - "epoch": 1.5805618911225445, - "grad_norm": 13.939750671386719, - "learning_rate": 2.3657301814624258e-05, - "loss": 0.667, - "step": 178790 - }, - { - "epoch": 1.5806502943828569, - "grad_norm": 1.304701328277588, - "learning_rate": 2.3655828426952386e-05, - "loss": 0.6548, - "step": 178800 - }, - { - "epoch": 1.5807386976431692, - "grad_norm": 1.8542201519012451, - "learning_rate": 2.3654355039280515e-05, - "loss": 0.6059, - "step": 178810 - }, - { - "epoch": 1.5808271009034813, - "grad_norm": 3.677777051925659, - "learning_rate": 2.3652881651608646e-05, - "loss": 0.5655, - "step": 178820 - }, - { - "epoch": 1.5809155041637935, - "grad_norm": 5.935771942138672, - "learning_rate": 2.3651408263936775e-05, - "loss": 0.6363, - "step": 178830 - }, - { - "epoch": 1.5810039074241058, - "grad_norm": 2.1556954383850098, - "learning_rate": 2.3649934876264903e-05, - "loss": 0.6915, - "step": 178840 - }, - { - "epoch": 1.5810923106844181, - "grad_norm": 1.132177710533142, - "learning_rate": 2.3648461488593035e-05, - "loss": 0.4895, - "step": 178850 - }, - { - "epoch": 1.5811807139447303, - "grad_norm": 2.6694698333740234, - "learning_rate": 2.3646988100921163e-05, - "loss": 0.5707, - "step": 178860 - }, - { - "epoch": 1.5812691172050424, - "grad_norm": 3.039940595626831, - "learning_rate": 2.364551471324929e-05, - "loss": 0.703, - "step": 178870 - }, - { - "epoch": 1.5813575204653547, - "grad_norm": 1.4182237386703491, - "learning_rate": 2.364404132557742e-05, - "loss": 0.6468, - "step": 178880 - }, - { - "epoch": 1.581445923725667, - "grad_norm": 1.8856532573699951, - "learning_rate": 2.364256793790555e-05, - "loss": 0.7067, - "step": 178890 - }, - { - "epoch": 1.5815343269859792, - "grad_norm": 0.8956825733184814, - "learning_rate": 2.364109455023368e-05, - "loss": 0.4886, - "step": 178900 - }, - { - "epoch": 1.5816227302462915, - "grad_norm": 2.1884191036224365, - "learning_rate": 2.3639621162561808e-05, - "loss": 0.5318, - "step": 178910 - }, - { - "epoch": 1.5817111335066039, - "grad_norm": 6.482180595397949, - "learning_rate": 2.3638147774889937e-05, - "loss": 0.6169, - "step": 178920 - }, - { - "epoch": 1.581799536766916, - "grad_norm": 2.9183382987976074, - "learning_rate": 2.363667438721807e-05, - "loss": 0.5812, - "step": 178930 - }, - { - "epoch": 1.581887940027228, - "grad_norm": 2.4691333770751953, - "learning_rate": 2.3635200999546197e-05, - "loss": 0.7067, - "step": 178940 - }, - { - "epoch": 1.5819763432875404, - "grad_norm": 4.402538299560547, - "learning_rate": 2.3633727611874325e-05, - "loss": 0.5928, - "step": 178950 - }, - { - "epoch": 1.5820647465478528, - "grad_norm": 7.086040496826172, - "learning_rate": 2.3632254224202457e-05, - "loss": 0.5833, - "step": 178960 - }, - { - "epoch": 1.582153149808165, - "grad_norm": 5.071657180786133, - "learning_rate": 2.3630780836530585e-05, - "loss": 0.7063, - "step": 178970 - }, - { - "epoch": 1.582241553068477, - "grad_norm": 0.8515512943267822, - "learning_rate": 2.3629307448858714e-05, - "loss": 0.631, - "step": 178980 - }, - { - "epoch": 1.5823299563287894, - "grad_norm": 2.0974810123443604, - "learning_rate": 2.3627834061186842e-05, - "loss": 0.5044, - "step": 178990 - }, - { - "epoch": 1.5824183595891017, - "grad_norm": 0.7980273365974426, - "learning_rate": 2.3626360673514974e-05, - "loss": 0.5441, - "step": 179000 - }, - { - "epoch": 1.5825067628494138, - "grad_norm": 5.945720672607422, - "learning_rate": 2.3624887285843102e-05, - "loss": 0.6803, - "step": 179010 - }, - { - "epoch": 1.5825951661097262, - "grad_norm": 4.092936992645264, - "learning_rate": 2.362341389817123e-05, - "loss": 0.5958, - "step": 179020 - }, - { - "epoch": 1.5826835693700385, - "grad_norm": 1.810025691986084, - "learning_rate": 2.3621940510499362e-05, - "loss": 0.6582, - "step": 179030 - }, - { - "epoch": 1.5827719726303506, - "grad_norm": 6.428379058837891, - "learning_rate": 2.362046712282749e-05, - "loss": 0.5424, - "step": 179040 - }, - { - "epoch": 1.5828603758906628, - "grad_norm": 9.304973602294922, - "learning_rate": 2.361899373515562e-05, - "loss": 0.5468, - "step": 179050 - }, - { - "epoch": 1.582948779150975, - "grad_norm": 2.233605146408081, - "learning_rate": 2.3617520347483747e-05, - "loss": 0.5723, - "step": 179060 - }, - { - "epoch": 1.5830371824112874, - "grad_norm": 4.325331211090088, - "learning_rate": 2.361604695981188e-05, - "loss": 0.5641, - "step": 179070 - }, - { - "epoch": 1.5831255856715996, - "grad_norm": 2.376910448074341, - "learning_rate": 2.3614573572140007e-05, - "loss": 0.5781, - "step": 179080 - }, - { - "epoch": 1.5832139889319117, - "grad_norm": 1.9221725463867188, - "learning_rate": 2.3613100184468136e-05, - "loss": 0.6365, - "step": 179090 - }, - { - "epoch": 1.583302392192224, - "grad_norm": 2.345121145248413, - "learning_rate": 2.3611626796796267e-05, - "loss": 0.6133, - "step": 179100 - }, - { - "epoch": 1.5833907954525364, - "grad_norm": 8.477579116821289, - "learning_rate": 2.3610153409124396e-05, - "loss": 0.5823, - "step": 179110 - }, - { - "epoch": 1.5834791987128485, - "grad_norm": 1.4636448621749878, - "learning_rate": 2.3608680021452524e-05, - "loss": 0.552, - "step": 179120 - }, - { - "epoch": 1.5835676019731608, - "grad_norm": 2.6343884468078613, - "learning_rate": 2.3607206633780656e-05, - "loss": 0.6246, - "step": 179130 - }, - { - "epoch": 1.5836560052334732, - "grad_norm": 4.477362155914307, - "learning_rate": 2.3605733246108784e-05, - "loss": 0.5574, - "step": 179140 - }, - { - "epoch": 1.5837444084937853, - "grad_norm": 1.9163240194320679, - "learning_rate": 2.3604259858436912e-05, - "loss": 0.6059, - "step": 179150 - }, - { - "epoch": 1.5838328117540974, - "grad_norm": 1.4452087879180908, - "learning_rate": 2.3602786470765044e-05, - "loss": 0.5309, - "step": 179160 - }, - { - "epoch": 1.5839212150144097, - "grad_norm": 2.4761500358581543, - "learning_rate": 2.3601313083093173e-05, - "loss": 0.6122, - "step": 179170 - }, - { - "epoch": 1.584009618274722, - "grad_norm": 2.7357590198516846, - "learning_rate": 2.35998396954213e-05, - "loss": 0.5505, - "step": 179180 - }, - { - "epoch": 1.5840980215350342, - "grad_norm": 1.9816958904266357, - "learning_rate": 2.3598366307749433e-05, - "loss": 0.4913, - "step": 179190 - }, - { - "epoch": 1.5841864247953463, - "grad_norm": 6.363073825836182, - "learning_rate": 2.359689292007756e-05, - "loss": 0.5269, - "step": 179200 - }, - { - "epoch": 1.5842748280556587, - "grad_norm": 4.007833957672119, - "learning_rate": 2.359541953240569e-05, - "loss": 0.608, - "step": 179210 - }, - { - "epoch": 1.584363231315971, - "grad_norm": 1.6797919273376465, - "learning_rate": 2.359394614473382e-05, - "loss": 0.4743, - "step": 179220 - }, - { - "epoch": 1.5844516345762831, - "grad_norm": 6.640074729919434, - "learning_rate": 2.359247275706195e-05, - "loss": 0.6119, - "step": 179230 - }, - { - "epoch": 1.5845400378365952, - "grad_norm": 2.887671947479248, - "learning_rate": 2.3590999369390078e-05, - "loss": 0.8554, - "step": 179240 - }, - { - "epoch": 1.5846284410969078, - "grad_norm": 7.402940273284912, - "learning_rate": 2.358952598171821e-05, - "loss": 0.6546, - "step": 179250 - }, - { - "epoch": 1.58471684435722, - "grad_norm": 1.115099549293518, - "learning_rate": 2.3588052594046338e-05, - "loss": 0.5681, - "step": 179260 - }, - { - "epoch": 1.584805247617532, - "grad_norm": 7.120483875274658, - "learning_rate": 2.3586579206374466e-05, - "loss": 0.6008, - "step": 179270 - }, - { - "epoch": 1.5848936508778444, - "grad_norm": 3.989267587661743, - "learning_rate": 2.3585105818702595e-05, - "loss": 0.6401, - "step": 179280 - }, - { - "epoch": 1.5849820541381567, - "grad_norm": 6.123172760009766, - "learning_rate": 2.3583632431030726e-05, - "loss": 0.6082, - "step": 179290 - }, - { - "epoch": 1.5850704573984689, - "grad_norm": 3.8420369625091553, - "learning_rate": 2.3582159043358855e-05, - "loss": 0.556, - "step": 179300 - }, - { - "epoch": 1.585158860658781, - "grad_norm": 5.0978684425354, - "learning_rate": 2.3580685655686983e-05, - "loss": 0.5908, - "step": 179310 - }, - { - "epoch": 1.5852472639190933, - "grad_norm": 2.9666075706481934, - "learning_rate": 2.3579212268015115e-05, - "loss": 0.5986, - "step": 179320 - }, - { - "epoch": 1.5853356671794057, - "grad_norm": 3.3843584060668945, - "learning_rate": 2.3577738880343243e-05, - "loss": 0.6425, - "step": 179330 - }, - { - "epoch": 1.5854240704397178, - "grad_norm": 9.74099063873291, - "learning_rate": 2.357626549267137e-05, - "loss": 0.5784, - "step": 179340 - }, - { - "epoch": 1.58551247370003, - "grad_norm": 2.8699123859405518, - "learning_rate": 2.35747921049995e-05, - "loss": 0.5442, - "step": 179350 - }, - { - "epoch": 1.5856008769603422, - "grad_norm": 2.280367374420166, - "learning_rate": 2.357331871732763e-05, - "loss": 0.6257, - "step": 179360 - }, - { - "epoch": 1.5856892802206546, - "grad_norm": 4.121230125427246, - "learning_rate": 2.357184532965576e-05, - "loss": 0.5657, - "step": 179370 - }, - { - "epoch": 1.5857776834809667, - "grad_norm": 3.2622361183166504, - "learning_rate": 2.357037194198389e-05, - "loss": 0.5927, - "step": 179380 - }, - { - "epoch": 1.585866086741279, - "grad_norm": 1.4865586757659912, - "learning_rate": 2.356889855431202e-05, - "loss": 0.6538, - "step": 179390 - }, - { - "epoch": 1.5859544900015914, - "grad_norm": 2.511223793029785, - "learning_rate": 2.356742516664015e-05, - "loss": 0.6043, - "step": 179400 - }, - { - "epoch": 1.5860428932619035, - "grad_norm": 9.320632934570312, - "learning_rate": 2.3565951778968277e-05, - "loss": 0.4915, - "step": 179410 - }, - { - "epoch": 1.5861312965222156, - "grad_norm": 7.399755954742432, - "learning_rate": 2.3564478391296405e-05, - "loss": 0.6575, - "step": 179420 - }, - { - "epoch": 1.586219699782528, - "grad_norm": 6.180196285247803, - "learning_rate": 2.3563005003624537e-05, - "loss": 0.6244, - "step": 179430 - }, - { - "epoch": 1.5863081030428403, - "grad_norm": 2.650404930114746, - "learning_rate": 2.3561531615952665e-05, - "loss": 0.5942, - "step": 179440 - }, - { - "epoch": 1.5863965063031524, - "grad_norm": 2.940699577331543, - "learning_rate": 2.3560058228280794e-05, - "loss": 0.4877, - "step": 179450 - }, - { - "epoch": 1.5864849095634646, - "grad_norm": 3.089730739593506, - "learning_rate": 2.3558584840608922e-05, - "loss": 0.6801, - "step": 179460 - }, - { - "epoch": 1.586573312823777, - "grad_norm": 2.5377092361450195, - "learning_rate": 2.3557111452937054e-05, - "loss": 0.5812, - "step": 179470 - }, - { - "epoch": 1.5866617160840892, - "grad_norm": 3.2368810176849365, - "learning_rate": 2.3555638065265182e-05, - "loss": 0.5587, - "step": 179480 - }, - { - "epoch": 1.5867501193444014, - "grad_norm": 8.294109344482422, - "learning_rate": 2.355416467759331e-05, - "loss": 0.5886, - "step": 179490 - }, - { - "epoch": 1.5868385226047137, - "grad_norm": 1.0442575216293335, - "learning_rate": 2.3552691289921442e-05, - "loss": 0.589, - "step": 179500 - }, - { - "epoch": 1.586926925865026, - "grad_norm": 2.7338919639587402, - "learning_rate": 2.355121790224957e-05, - "loss": 0.7372, - "step": 179510 - }, - { - "epoch": 1.5870153291253382, - "grad_norm": 1.5340664386749268, - "learning_rate": 2.35497445145777e-05, - "loss": 0.5771, - "step": 179520 - }, - { - "epoch": 1.5871037323856503, - "grad_norm": 1.4046498537063599, - "learning_rate": 2.3548271126905827e-05, - "loss": 0.4419, - "step": 179530 - }, - { - "epoch": 1.5871921356459626, - "grad_norm": 0.923066258430481, - "learning_rate": 2.354679773923396e-05, - "loss": 0.6282, - "step": 179540 - }, - { - "epoch": 1.587280538906275, - "grad_norm": 7.069698810577393, - "learning_rate": 2.3545324351562087e-05, - "loss": 0.5107, - "step": 179550 - }, - { - "epoch": 1.587368942166587, - "grad_norm": 9.05636978149414, - "learning_rate": 2.3543850963890216e-05, - "loss": 0.6176, - "step": 179560 - }, - { - "epoch": 1.5874573454268992, - "grad_norm": 3.305478572845459, - "learning_rate": 2.3542377576218344e-05, - "loss": 0.6397, - "step": 179570 - }, - { - "epoch": 1.5875457486872115, - "grad_norm": 3.001066207885742, - "learning_rate": 2.3540904188546476e-05, - "loss": 0.5991, - "step": 179580 - }, - { - "epoch": 1.5876341519475239, - "grad_norm": 1.7910176515579224, - "learning_rate": 2.3539430800874604e-05, - "loss": 0.4692, - "step": 179590 - }, - { - "epoch": 1.587722555207836, - "grad_norm": 3.8087406158447266, - "learning_rate": 2.3537957413202732e-05, - "loss": 0.6447, - "step": 179600 - }, - { - "epoch": 1.5878109584681483, - "grad_norm": 2.3797008991241455, - "learning_rate": 2.3536484025530864e-05, - "loss": 0.5771, - "step": 179610 - }, - { - "epoch": 1.5878993617284607, - "grad_norm": 1.7294301986694336, - "learning_rate": 2.3535010637858993e-05, - "loss": 0.6754, - "step": 179620 - }, - { - "epoch": 1.5879877649887728, - "grad_norm": 3.0239651203155518, - "learning_rate": 2.353353725018712e-05, - "loss": 0.7199, - "step": 179630 - }, - { - "epoch": 1.588076168249085, - "grad_norm": 2.4570260047912598, - "learning_rate": 2.353206386251525e-05, - "loss": 0.5504, - "step": 179640 - }, - { - "epoch": 1.5881645715093973, - "grad_norm": 3.3867244720458984, - "learning_rate": 2.353059047484338e-05, - "loss": 0.5894, - "step": 179650 - }, - { - "epoch": 1.5882529747697096, - "grad_norm": 2.0542361736297607, - "learning_rate": 2.352911708717151e-05, - "loss": 0.3638, - "step": 179660 - }, - { - "epoch": 1.5883413780300217, - "grad_norm": 3.210259437561035, - "learning_rate": 2.3527643699499638e-05, - "loss": 0.556, - "step": 179670 - }, - { - "epoch": 1.5884297812903339, - "grad_norm": 1.8866873979568481, - "learning_rate": 2.352617031182777e-05, - "loss": 0.5619, - "step": 179680 - }, - { - "epoch": 1.5885181845506462, - "grad_norm": 3.407763957977295, - "learning_rate": 2.3524696924155898e-05, - "loss": 0.6893, - "step": 179690 - }, - { - "epoch": 1.5886065878109585, - "grad_norm": 2.2238566875457764, - "learning_rate": 2.3523223536484026e-05, - "loss": 0.5486, - "step": 179700 - }, - { - "epoch": 1.5886949910712707, - "grad_norm": 2.908684730529785, - "learning_rate": 2.3521750148812155e-05, - "loss": 0.5935, - "step": 179710 - }, - { - "epoch": 1.588783394331583, - "grad_norm": 2.056279182434082, - "learning_rate": 2.3520276761140286e-05, - "loss": 0.5224, - "step": 179720 - }, - { - "epoch": 1.5888717975918953, - "grad_norm": 1.5335217714309692, - "learning_rate": 2.3518803373468415e-05, - "loss": 0.5014, - "step": 179730 - }, - { - "epoch": 1.5889602008522075, - "grad_norm": 1.1683319807052612, - "learning_rate": 2.3517329985796543e-05, - "loss": 0.6044, - "step": 179740 - }, - { - "epoch": 1.5890486041125196, - "grad_norm": 3.7608189582824707, - "learning_rate": 2.351585659812467e-05, - "loss": 0.6802, - "step": 179750 - }, - { - "epoch": 1.589137007372832, - "grad_norm": 2.6810014247894287, - "learning_rate": 2.3514383210452803e-05, - "loss": 0.6324, - "step": 179760 - }, - { - "epoch": 1.5892254106331443, - "grad_norm": 2.0969247817993164, - "learning_rate": 2.351290982278093e-05, - "loss": 0.5661, - "step": 179770 - }, - { - "epoch": 1.5893138138934564, - "grad_norm": 2.7428903579711914, - "learning_rate": 2.351143643510906e-05, - "loss": 0.5117, - "step": 179780 - }, - { - "epoch": 1.5894022171537685, - "grad_norm": 2.2435295581817627, - "learning_rate": 2.350996304743719e-05, - "loss": 0.4758, - "step": 179790 - }, - { - "epoch": 1.5894906204140808, - "grad_norm": 2.1977460384368896, - "learning_rate": 2.350848965976532e-05, - "loss": 0.6348, - "step": 179800 - }, - { - "epoch": 1.5895790236743932, - "grad_norm": 7.006791114807129, - "learning_rate": 2.3507016272093448e-05, - "loss": 0.5691, - "step": 179810 - }, - { - "epoch": 1.5896674269347053, - "grad_norm": 4.33966064453125, - "learning_rate": 2.3505542884421577e-05, - "loss": 0.6901, - "step": 179820 - }, - { - "epoch": 1.5897558301950174, - "grad_norm": 1.4744582176208496, - "learning_rate": 2.350406949674971e-05, - "loss": 0.5036, - "step": 179830 - }, - { - "epoch": 1.58984423345533, - "grad_norm": 1.8755768537521362, - "learning_rate": 2.3502596109077837e-05, - "loss": 0.6141, - "step": 179840 - }, - { - "epoch": 1.5899326367156421, - "grad_norm": 3.1658451557159424, - "learning_rate": 2.3501122721405965e-05, - "loss": 0.6172, - "step": 179850 - }, - { - "epoch": 1.5900210399759542, - "grad_norm": 2.5658719539642334, - "learning_rate": 2.3499649333734097e-05, - "loss": 0.6669, - "step": 179860 - }, - { - "epoch": 1.5901094432362666, - "grad_norm": 3.693065643310547, - "learning_rate": 2.3498175946062225e-05, - "loss": 0.6059, - "step": 179870 - }, - { - "epoch": 1.590197846496579, - "grad_norm": 2.051083564758301, - "learning_rate": 2.3496702558390353e-05, - "loss": 0.7024, - "step": 179880 - }, - { - "epoch": 1.590286249756891, - "grad_norm": 3.083063840866089, - "learning_rate": 2.3495229170718482e-05, - "loss": 0.5307, - "step": 179890 - }, - { - "epoch": 1.5903746530172032, - "grad_norm": 4.3811235427856445, - "learning_rate": 2.3493755783046614e-05, - "loss": 0.5674, - "step": 179900 - }, - { - "epoch": 1.5904630562775155, - "grad_norm": 1.8005938529968262, - "learning_rate": 2.3492282395374742e-05, - "loss": 0.5071, - "step": 179910 - }, - { - "epoch": 1.5905514595378278, - "grad_norm": 3.3282253742218018, - "learning_rate": 2.349080900770287e-05, - "loss": 0.6421, - "step": 179920 - }, - { - "epoch": 1.59063986279814, - "grad_norm": 3.116586208343506, - "learning_rate": 2.3489335620031e-05, - "loss": 0.6063, - "step": 179930 - }, - { - "epoch": 1.590728266058452, - "grad_norm": 2.2537944316864014, - "learning_rate": 2.348786223235913e-05, - "loss": 0.5991, - "step": 179940 - }, - { - "epoch": 1.5908166693187646, - "grad_norm": 13.848723411560059, - "learning_rate": 2.348638884468726e-05, - "loss": 0.6208, - "step": 179950 - }, - { - "epoch": 1.5909050725790768, - "grad_norm": 3.3459393978118896, - "learning_rate": 2.3484915457015387e-05, - "loss": 0.6962, - "step": 179960 - }, - { - "epoch": 1.5909934758393889, - "grad_norm": 2.123842716217041, - "learning_rate": 2.348344206934352e-05, - "loss": 0.5256, - "step": 179970 - }, - { - "epoch": 1.5910818790997012, - "grad_norm": 4.606895923614502, - "learning_rate": 2.3481968681671647e-05, - "loss": 0.7916, - "step": 179980 - }, - { - "epoch": 1.5911702823600136, - "grad_norm": 12.328638076782227, - "learning_rate": 2.3480495293999776e-05, - "loss": 0.6393, - "step": 179990 - }, - { - "epoch": 1.5912586856203257, - "grad_norm": 3.4472873210906982, - "learning_rate": 2.3479021906327904e-05, - "loss": 0.5403, - "step": 180000 - }, - { - "epoch": 1.5913470888806378, - "grad_norm": 0.8657683730125427, - "learning_rate": 2.3477548518656036e-05, - "loss": 0.5665, - "step": 180010 - }, - { - "epoch": 1.5914354921409501, - "grad_norm": 1.5501017570495605, - "learning_rate": 2.3476075130984164e-05, - "loss": 0.6192, - "step": 180020 - }, - { - "epoch": 1.5915238954012625, - "grad_norm": 1.8315023183822632, - "learning_rate": 2.3474601743312292e-05, - "loss": 0.6643, - "step": 180030 - }, - { - "epoch": 1.5916122986615746, - "grad_norm": 4.753173351287842, - "learning_rate": 2.3473128355640424e-05, - "loss": 0.6156, - "step": 180040 - }, - { - "epoch": 1.5917007019218867, - "grad_norm": 2.014275312423706, - "learning_rate": 2.3471654967968552e-05, - "loss": 0.6332, - "step": 180050 - }, - { - "epoch": 1.591789105182199, - "grad_norm": 1.5966054201126099, - "learning_rate": 2.347018158029668e-05, - "loss": 0.5239, - "step": 180060 - }, - { - "epoch": 1.5918775084425114, - "grad_norm": 8.995635032653809, - "learning_rate": 2.3468708192624813e-05, - "loss": 0.5499, - "step": 180070 - }, - { - "epoch": 1.5919659117028235, - "grad_norm": 5.3437628746032715, - "learning_rate": 2.346723480495294e-05, - "loss": 0.5464, - "step": 180080 - }, - { - "epoch": 1.5920543149631359, - "grad_norm": 3.6786534786224365, - "learning_rate": 2.346576141728107e-05, - "loss": 0.6158, - "step": 180090 - }, - { - "epoch": 1.5921427182234482, - "grad_norm": 2.0477163791656494, - "learning_rate": 2.34642880296092e-05, - "loss": 0.5796, - "step": 180100 - }, - { - "epoch": 1.5922311214837603, - "grad_norm": 1.4329932928085327, - "learning_rate": 2.346281464193733e-05, - "loss": 0.5692, - "step": 180110 - }, - { - "epoch": 1.5923195247440725, - "grad_norm": 1.3678836822509766, - "learning_rate": 2.3461341254265458e-05, - "loss": 0.6621, - "step": 180120 - }, - { - "epoch": 1.5924079280043848, - "grad_norm": 0.872795820236206, - "learning_rate": 2.345986786659359e-05, - "loss": 0.5538, - "step": 180130 - }, - { - "epoch": 1.5924963312646971, - "grad_norm": 1.3687347173690796, - "learning_rate": 2.3458394478921718e-05, - "loss": 0.5979, - "step": 180140 - }, - { - "epoch": 1.5925847345250093, - "grad_norm": 5.690780162811279, - "learning_rate": 2.3456921091249846e-05, - "loss": 0.6672, - "step": 180150 - }, - { - "epoch": 1.5926731377853214, - "grad_norm": 1.6220439672470093, - "learning_rate": 2.3455447703577978e-05, - "loss": 0.6538, - "step": 180160 - }, - { - "epoch": 1.5927615410456337, - "grad_norm": 5.372829437255859, - "learning_rate": 2.3453974315906106e-05, - "loss": 0.5669, - "step": 180170 - }, - { - "epoch": 1.592849944305946, - "grad_norm": 3.263195514678955, - "learning_rate": 2.3452500928234235e-05, - "loss": 0.5365, - "step": 180180 - }, - { - "epoch": 1.5929383475662582, - "grad_norm": 4.150403022766113, - "learning_rate": 2.3451027540562366e-05, - "loss": 0.622, - "step": 180190 - }, - { - "epoch": 1.5930267508265705, - "grad_norm": 2.364734411239624, - "learning_rate": 2.3449554152890495e-05, - "loss": 0.6447, - "step": 180200 - }, - { - "epoch": 1.5931151540868829, - "grad_norm": 3.08217453956604, - "learning_rate": 2.3448080765218623e-05, - "loss": 0.6072, - "step": 180210 - }, - { - "epoch": 1.593203557347195, - "grad_norm": 2.8130154609680176, - "learning_rate": 2.344660737754675e-05, - "loss": 0.5749, - "step": 180220 - }, - { - "epoch": 1.593291960607507, - "grad_norm": 61.3019905090332, - "learning_rate": 2.3445133989874883e-05, - "loss": 0.5995, - "step": 180230 - }, - { - "epoch": 1.5933803638678194, - "grad_norm": 6.596480846405029, - "learning_rate": 2.344366060220301e-05, - "loss": 0.7515, - "step": 180240 - }, - { - "epoch": 1.5934687671281318, - "grad_norm": 2.65738844871521, - "learning_rate": 2.344218721453114e-05, - "loss": 0.731, - "step": 180250 - }, - { - "epoch": 1.593557170388444, - "grad_norm": 5.716721534729004, - "learning_rate": 2.344071382685927e-05, - "loss": 0.6749, - "step": 180260 - }, - { - "epoch": 1.593645573648756, - "grad_norm": 2.220371723175049, - "learning_rate": 2.34392404391874e-05, - "loss": 0.535, - "step": 180270 - }, - { - "epoch": 1.5937339769090684, - "grad_norm": 5.855672836303711, - "learning_rate": 2.3437767051515528e-05, - "loss": 0.7136, - "step": 180280 - }, - { - "epoch": 1.5938223801693807, - "grad_norm": 1.289474368095398, - "learning_rate": 2.3436293663843657e-05, - "loss": 0.5728, - "step": 180290 - }, - { - "epoch": 1.5939107834296928, - "grad_norm": 2.866244077682495, - "learning_rate": 2.343482027617179e-05, - "loss": 0.7036, - "step": 180300 - }, - { - "epoch": 1.5939991866900052, - "grad_norm": 2.3711483478546143, - "learning_rate": 2.3433346888499917e-05, - "loss": 0.6784, - "step": 180310 - }, - { - "epoch": 1.5940875899503175, - "grad_norm": 5.846799373626709, - "learning_rate": 2.3431873500828045e-05, - "loss": 0.6402, - "step": 180320 - }, - { - "epoch": 1.5941759932106296, - "grad_norm": 2.150857448577881, - "learning_rate": 2.3430400113156177e-05, - "loss": 0.6215, - "step": 180330 - }, - { - "epoch": 1.5942643964709418, - "grad_norm": 2.2093544006347656, - "learning_rate": 2.3428926725484305e-05, - "loss": 0.5766, - "step": 180340 - }, - { - "epoch": 1.594352799731254, - "grad_norm": 3.9267990589141846, - "learning_rate": 2.3427453337812434e-05, - "loss": 0.587, - "step": 180350 - }, - { - "epoch": 1.5944412029915664, - "grad_norm": 2.444490432739258, - "learning_rate": 2.3425979950140562e-05, - "loss": 0.5162, - "step": 180360 - }, - { - "epoch": 1.5945296062518786, - "grad_norm": 1.5033422708511353, - "learning_rate": 2.3424506562468694e-05, - "loss": 0.653, - "step": 180370 - }, - { - "epoch": 1.5946180095121907, - "grad_norm": 2.6062347888946533, - "learning_rate": 2.3423033174796822e-05, - "loss": 0.7283, - "step": 180380 - }, - { - "epoch": 1.594706412772503, - "grad_norm": 1.7430806159973145, - "learning_rate": 2.342155978712495e-05, - "loss": 0.6121, - "step": 180390 - }, - { - "epoch": 1.5947948160328154, - "grad_norm": 7.023827075958252, - "learning_rate": 2.342008639945308e-05, - "loss": 0.6218, - "step": 180400 - }, - { - "epoch": 1.5948832192931275, - "grad_norm": 6.906120300292969, - "learning_rate": 2.341861301178121e-05, - "loss": 0.6109, - "step": 180410 - }, - { - "epoch": 1.5949716225534396, - "grad_norm": 1.745193600654602, - "learning_rate": 2.341713962410934e-05, - "loss": 0.6717, - "step": 180420 - }, - { - "epoch": 1.5950600258137522, - "grad_norm": 2.645542860031128, - "learning_rate": 2.3415666236437467e-05, - "loss": 0.6428, - "step": 180430 - }, - { - "epoch": 1.5951484290740643, - "grad_norm": 2.2267887592315674, - "learning_rate": 2.34141928487656e-05, - "loss": 0.4378, - "step": 180440 - }, - { - "epoch": 1.5952368323343764, - "grad_norm": 2.023374319076538, - "learning_rate": 2.3412719461093727e-05, - "loss": 0.709, - "step": 180450 - }, - { - "epoch": 1.5953252355946888, - "grad_norm": 1.7647895812988281, - "learning_rate": 2.3411246073421856e-05, - "loss": 0.5173, - "step": 180460 - }, - { - "epoch": 1.595413638855001, - "grad_norm": 5.248888969421387, - "learning_rate": 2.3409772685749984e-05, - "loss": 0.7386, - "step": 180470 - }, - { - "epoch": 1.5955020421153132, - "grad_norm": 2.8855655193328857, - "learning_rate": 2.3408299298078116e-05, - "loss": 0.7359, - "step": 180480 - }, - { - "epoch": 1.5955904453756253, - "grad_norm": 3.3585245609283447, - "learning_rate": 2.3406825910406244e-05, - "loss": 0.5645, - "step": 180490 - }, - { - "epoch": 1.5956788486359377, - "grad_norm": 1.4526543617248535, - "learning_rate": 2.3405352522734372e-05, - "loss": 0.5595, - "step": 180500 - }, - { - "epoch": 1.59576725189625, - "grad_norm": 4.439969062805176, - "learning_rate": 2.34038791350625e-05, - "loss": 0.5334, - "step": 180510 - }, - { - "epoch": 1.5958556551565621, - "grad_norm": 1.8360610008239746, - "learning_rate": 2.3402405747390632e-05, - "loss": 0.6466, - "step": 180520 - }, - { - "epoch": 1.5959440584168743, - "grad_norm": 1.8138060569763184, - "learning_rate": 2.340093235971876e-05, - "loss": 0.8035, - "step": 180530 - }, - { - "epoch": 1.5960324616771868, - "grad_norm": 2.0265145301818848, - "learning_rate": 2.339945897204689e-05, - "loss": 0.5551, - "step": 180540 - }, - { - "epoch": 1.596120864937499, - "grad_norm": 10.673317909240723, - "learning_rate": 2.339798558437502e-05, - "loss": 0.6168, - "step": 180550 - }, - { - "epoch": 1.596209268197811, - "grad_norm": 3.2672953605651855, - "learning_rate": 2.339651219670315e-05, - "loss": 0.606, - "step": 180560 - }, - { - "epoch": 1.5962976714581234, - "grad_norm": 1.986310601234436, - "learning_rate": 2.3395038809031278e-05, - "loss": 0.4415, - "step": 180570 - }, - { - "epoch": 1.5963860747184357, - "grad_norm": 1.5670112371444702, - "learning_rate": 2.3393565421359406e-05, - "loss": 0.5865, - "step": 180580 - }, - { - "epoch": 1.5964744779787479, - "grad_norm": 3.4498016834259033, - "learning_rate": 2.3392092033687538e-05, - "loss": 0.5926, - "step": 180590 - }, - { - "epoch": 1.59656288123906, - "grad_norm": 1.3564311265945435, - "learning_rate": 2.3390618646015666e-05, - "loss": 0.6366, - "step": 180600 - }, - { - "epoch": 1.5966512844993723, - "grad_norm": 20.663562774658203, - "learning_rate": 2.3389145258343794e-05, - "loss": 0.6622, - "step": 180610 - }, - { - "epoch": 1.5967396877596847, - "grad_norm": 1.8269972801208496, - "learning_rate": 2.3387671870671926e-05, - "loss": 0.5852, - "step": 180620 - }, - { - "epoch": 1.5968280910199968, - "grad_norm": 2.7022252082824707, - "learning_rate": 2.3386198483000055e-05, - "loss": 0.5378, - "step": 180630 - }, - { - "epoch": 1.596916494280309, - "grad_norm": 4.237645149230957, - "learning_rate": 2.3384725095328183e-05, - "loss": 0.5791, - "step": 180640 - }, - { - "epoch": 1.5970048975406212, - "grad_norm": 7.8528642654418945, - "learning_rate": 2.338325170765631e-05, - "loss": 0.5662, - "step": 180650 - }, - { - "epoch": 1.5970933008009336, - "grad_norm": 6.670100688934326, - "learning_rate": 2.3381778319984443e-05, - "loss": 0.5362, - "step": 180660 - }, - { - "epoch": 1.5971817040612457, - "grad_norm": 1.4665193557739258, - "learning_rate": 2.338030493231257e-05, - "loss": 0.4798, - "step": 180670 - }, - { - "epoch": 1.597270107321558, - "grad_norm": 1.8202050924301147, - "learning_rate": 2.33788315446407e-05, - "loss": 0.6075, - "step": 180680 - }, - { - "epoch": 1.5973585105818704, - "grad_norm": 1.1200059652328491, - "learning_rate": 2.3377358156968828e-05, - "loss": 0.5509, - "step": 180690 - }, - { - "epoch": 1.5974469138421825, - "grad_norm": 1.9601930379867554, - "learning_rate": 2.337588476929696e-05, - "loss": 0.601, - "step": 180700 - }, - { - "epoch": 1.5975353171024946, - "grad_norm": 2.990407943725586, - "learning_rate": 2.3374411381625088e-05, - "loss": 0.5793, - "step": 180710 - }, - { - "epoch": 1.597623720362807, - "grad_norm": 2.6189730167388916, - "learning_rate": 2.3372937993953217e-05, - "loss": 0.5415, - "step": 180720 - }, - { - "epoch": 1.5977121236231193, - "grad_norm": 4.03648567199707, - "learning_rate": 2.3371464606281348e-05, - "loss": 0.5235, - "step": 180730 - }, - { - "epoch": 1.5978005268834314, - "grad_norm": 1.577492594718933, - "learning_rate": 2.3369991218609477e-05, - "loss": 0.5668, - "step": 180740 - }, - { - "epoch": 1.5978889301437436, - "grad_norm": 4.022270679473877, - "learning_rate": 2.3368517830937605e-05, - "loss": 0.5031, - "step": 180750 - }, - { - "epoch": 1.597977333404056, - "grad_norm": 2.198739528656006, - "learning_rate": 2.3367044443265733e-05, - "loss": 0.5673, - "step": 180760 - }, - { - "epoch": 1.5980657366643682, - "grad_norm": 2.115494966506958, - "learning_rate": 2.3365571055593865e-05, - "loss": 0.7283, - "step": 180770 - }, - { - "epoch": 1.5981541399246804, - "grad_norm": 3.8738770484924316, - "learning_rate": 2.3364097667921993e-05, - "loss": 0.5925, - "step": 180780 - }, - { - "epoch": 1.5982425431849927, - "grad_norm": 3.2534127235412598, - "learning_rate": 2.3362624280250122e-05, - "loss": 0.4669, - "step": 180790 - }, - { - "epoch": 1.598330946445305, - "grad_norm": 2.4455721378326416, - "learning_rate": 2.3361150892578254e-05, - "loss": 0.759, - "step": 180800 - }, - { - "epoch": 1.5984193497056172, - "grad_norm": 1.9553617238998413, - "learning_rate": 2.3359677504906382e-05, - "loss": 0.5701, - "step": 180810 - }, - { - "epoch": 1.5985077529659293, - "grad_norm": 4.097979545593262, - "learning_rate": 2.335820411723451e-05, - "loss": 0.5689, - "step": 180820 - }, - { - "epoch": 1.5985961562262416, - "grad_norm": 6.519585609436035, - "learning_rate": 2.335673072956264e-05, - "loss": 0.5682, - "step": 180830 - }, - { - "epoch": 1.598684559486554, - "grad_norm": 2.102809190750122, - "learning_rate": 2.335525734189077e-05, - "loss": 0.5077, - "step": 180840 - }, - { - "epoch": 1.598772962746866, - "grad_norm": 3.9697105884552, - "learning_rate": 2.33537839542189e-05, - "loss": 0.614, - "step": 180850 - }, - { - "epoch": 1.5988613660071782, - "grad_norm": 5.887269020080566, - "learning_rate": 2.3352310566547027e-05, - "loss": 0.7094, - "step": 180860 - }, - { - "epoch": 1.5989497692674906, - "grad_norm": 16.581584930419922, - "learning_rate": 2.3350837178875155e-05, - "loss": 0.6375, - "step": 180870 - }, - { - "epoch": 1.599038172527803, - "grad_norm": 9.863439559936523, - "learning_rate": 2.3349363791203287e-05, - "loss": 0.6338, - "step": 180880 - }, - { - "epoch": 1.599126575788115, - "grad_norm": 2.2112786769866943, - "learning_rate": 2.3347890403531415e-05, - "loss": 0.5495, - "step": 180890 - }, - { - "epoch": 1.5992149790484274, - "grad_norm": 1.802366018295288, - "learning_rate": 2.3346417015859544e-05, - "loss": 0.4762, - "step": 180900 - }, - { - "epoch": 1.5993033823087397, - "grad_norm": 1.8756802082061768, - "learning_rate": 2.3344943628187676e-05, - "loss": 0.5056, - "step": 180910 - }, - { - "epoch": 1.5993917855690518, - "grad_norm": 5.013724327087402, - "learning_rate": 2.3343470240515804e-05, - "loss": 0.5394, - "step": 180920 - }, - { - "epoch": 1.599480188829364, - "grad_norm": 2.391293525695801, - "learning_rate": 2.3341996852843932e-05, - "loss": 0.5915, - "step": 180930 - }, - { - "epoch": 1.5995685920896763, - "grad_norm": 0.8728381395339966, - "learning_rate": 2.334052346517206e-05, - "loss": 0.5568, - "step": 180940 - }, - { - "epoch": 1.5996569953499886, - "grad_norm": 2.9804489612579346, - "learning_rate": 2.3339050077500192e-05, - "loss": 0.6745, - "step": 180950 - }, - { - "epoch": 1.5997453986103007, - "grad_norm": 4.190545082092285, - "learning_rate": 2.333757668982832e-05, - "loss": 0.7235, - "step": 180960 - }, - { - "epoch": 1.5998338018706129, - "grad_norm": 2.6208951473236084, - "learning_rate": 2.333610330215645e-05, - "loss": 0.6569, - "step": 180970 - }, - { - "epoch": 1.5999222051309252, - "grad_norm": 1.6110780239105225, - "learning_rate": 2.333462991448458e-05, - "loss": 0.6735, - "step": 180980 - }, - { - "epoch": 1.6000106083912375, - "grad_norm": 3.2944605350494385, - "learning_rate": 2.333315652681271e-05, - "loss": 0.6234, - "step": 180990 - }, - { - "epoch": 1.6000990116515497, - "grad_norm": 3.5745596885681152, - "learning_rate": 2.3331683139140838e-05, - "loss": 0.7434, - "step": 181000 - }, - { - "epoch": 1.6001874149118618, - "grad_norm": 1.958343267440796, - "learning_rate": 2.333020975146897e-05, - "loss": 0.574, - "step": 181010 - }, - { - "epoch": 1.6002758181721743, - "grad_norm": 1.6085907220840454, - "learning_rate": 2.3328736363797098e-05, - "loss": 0.6236, - "step": 181020 - }, - { - "epoch": 1.6003642214324865, - "grad_norm": 8.054312705993652, - "learning_rate": 2.3327262976125226e-05, - "loss": 0.5869, - "step": 181030 - }, - { - "epoch": 1.6004526246927986, - "grad_norm": 1.788875937461853, - "learning_rate": 2.3325789588453358e-05, - "loss": 0.5936, - "step": 181040 - }, - { - "epoch": 1.600541027953111, - "grad_norm": 16.9254093170166, - "learning_rate": 2.3324316200781486e-05, - "loss": 0.6252, - "step": 181050 - }, - { - "epoch": 1.6006294312134233, - "grad_norm": 1.7318662405014038, - "learning_rate": 2.3322842813109614e-05, - "loss": 0.5142, - "step": 181060 - }, - { - "epoch": 1.6007178344737354, - "grad_norm": 1.4358969926834106, - "learning_rate": 2.3321369425437746e-05, - "loss": 0.5436, - "step": 181070 - }, - { - "epoch": 1.6008062377340475, - "grad_norm": 2.0630404949188232, - "learning_rate": 2.3319896037765875e-05, - "loss": 0.5965, - "step": 181080 - }, - { - "epoch": 1.6008946409943599, - "grad_norm": 1.9878710508346558, - "learning_rate": 2.3318422650094003e-05, - "loss": 0.5732, - "step": 181090 - }, - { - "epoch": 1.6009830442546722, - "grad_norm": 9.042510986328125, - "learning_rate": 2.3316949262422135e-05, - "loss": 0.6844, - "step": 181100 - }, - { - "epoch": 1.6010714475149843, - "grad_norm": 2.026099920272827, - "learning_rate": 2.3315475874750263e-05, - "loss": 0.5458, - "step": 181110 - }, - { - "epoch": 1.6011598507752964, - "grad_norm": 5.122081279754639, - "learning_rate": 2.331400248707839e-05, - "loss": 0.5995, - "step": 181120 - }, - { - "epoch": 1.601248254035609, - "grad_norm": 6.760629653930664, - "learning_rate": 2.3312529099406523e-05, - "loss": 0.548, - "step": 181130 - }, - { - "epoch": 1.6013366572959211, - "grad_norm": 2.083120107650757, - "learning_rate": 2.331105571173465e-05, - "loss": 0.4942, - "step": 181140 - }, - { - "epoch": 1.6014250605562332, - "grad_norm": 2.0648934841156006, - "learning_rate": 2.330958232406278e-05, - "loss": 0.4688, - "step": 181150 - }, - { - "epoch": 1.6015134638165456, - "grad_norm": 10.854294776916504, - "learning_rate": 2.3308108936390908e-05, - "loss": 0.6666, - "step": 181160 - }, - { - "epoch": 1.601601867076858, - "grad_norm": 3.3238260746002197, - "learning_rate": 2.330663554871904e-05, - "loss": 0.5312, - "step": 181170 - }, - { - "epoch": 1.60169027033717, - "grad_norm": 1.9797765016555786, - "learning_rate": 2.3305162161047168e-05, - "loss": 0.5256, - "step": 181180 - }, - { - "epoch": 1.6017786735974822, - "grad_norm": 1.6275157928466797, - "learning_rate": 2.3303688773375297e-05, - "loss": 0.4706, - "step": 181190 - }, - { - "epoch": 1.6018670768577945, - "grad_norm": 1.9179420471191406, - "learning_rate": 2.330221538570343e-05, - "loss": 0.5285, - "step": 181200 - }, - { - "epoch": 1.6019554801181068, - "grad_norm": 4.8577880859375, - "learning_rate": 2.3300741998031557e-05, - "loss": 0.7118, - "step": 181210 - }, - { - "epoch": 1.602043883378419, - "grad_norm": 13.904053688049316, - "learning_rate": 2.3299268610359685e-05, - "loss": 0.4638, - "step": 181220 - }, - { - "epoch": 1.602132286638731, - "grad_norm": 4.712271213531494, - "learning_rate": 2.3297795222687813e-05, - "loss": 0.4728, - "step": 181230 - }, - { - "epoch": 1.6022206898990434, - "grad_norm": 4.088210105895996, - "learning_rate": 2.3296321835015945e-05, - "loss": 0.5832, - "step": 181240 - }, - { - "epoch": 1.6023090931593558, - "grad_norm": 4.0730109214782715, - "learning_rate": 2.3294848447344073e-05, - "loss": 0.6342, - "step": 181250 - }, - { - "epoch": 1.6023974964196679, - "grad_norm": 0.8740782737731934, - "learning_rate": 2.3293375059672202e-05, - "loss": 0.626, - "step": 181260 - }, - { - "epoch": 1.6024858996799802, - "grad_norm": 2.648021697998047, - "learning_rate": 2.3291901672000334e-05, - "loss": 0.6943, - "step": 181270 - }, - { - "epoch": 1.6025743029402926, - "grad_norm": 1.7694991827011108, - "learning_rate": 2.3290428284328462e-05, - "loss": 0.5814, - "step": 181280 - }, - { - "epoch": 1.6026627062006047, - "grad_norm": 5.823047637939453, - "learning_rate": 2.328895489665659e-05, - "loss": 0.5385, - "step": 181290 - }, - { - "epoch": 1.6027511094609168, - "grad_norm": 3.57016921043396, - "learning_rate": 2.328748150898472e-05, - "loss": 0.5916, - "step": 181300 - }, - { - "epoch": 1.6028395127212292, - "grad_norm": 2.9068450927734375, - "learning_rate": 2.328600812131285e-05, - "loss": 0.6711, - "step": 181310 - }, - { - "epoch": 1.6029279159815415, - "grad_norm": 1.2106068134307861, - "learning_rate": 2.328453473364098e-05, - "loss": 0.5868, - "step": 181320 - }, - { - "epoch": 1.6030163192418536, - "grad_norm": 12.682123184204102, - "learning_rate": 2.3283061345969107e-05, - "loss": 0.6893, - "step": 181330 - }, - { - "epoch": 1.6031047225021657, - "grad_norm": 3.611449956893921, - "learning_rate": 2.3281587958297235e-05, - "loss": 0.4965, - "step": 181340 - }, - { - "epoch": 1.603193125762478, - "grad_norm": 2.0053744316101074, - "learning_rate": 2.3280114570625367e-05, - "loss": 0.5402, - "step": 181350 - }, - { - "epoch": 1.6032815290227904, - "grad_norm": 2.563060998916626, - "learning_rate": 2.3278641182953496e-05, - "loss": 0.5686, - "step": 181360 - }, - { - "epoch": 1.6033699322831025, - "grad_norm": 13.727355003356934, - "learning_rate": 2.3277167795281624e-05, - "loss": 0.6338, - "step": 181370 - }, - { - "epoch": 1.6034583355434149, - "grad_norm": 2.278502941131592, - "learning_rate": 2.3275694407609756e-05, - "loss": 0.412, - "step": 181380 - }, - { - "epoch": 1.6035467388037272, - "grad_norm": 0.7340344190597534, - "learning_rate": 2.3274221019937884e-05, - "loss": 0.4774, - "step": 181390 - }, - { - "epoch": 1.6036351420640393, - "grad_norm": 16.899831771850586, - "learning_rate": 2.3272747632266012e-05, - "loss": 0.5788, - "step": 181400 - }, - { - "epoch": 1.6037235453243515, - "grad_norm": 1.7707161903381348, - "learning_rate": 2.327127424459414e-05, - "loss": 0.6372, - "step": 181410 - }, - { - "epoch": 1.6038119485846638, - "grad_norm": 2.171081066131592, - "learning_rate": 2.3269800856922272e-05, - "loss": 0.626, - "step": 181420 - }, - { - "epoch": 1.6039003518449761, - "grad_norm": 5.244316101074219, - "learning_rate": 2.32683274692504e-05, - "loss": 0.6371, - "step": 181430 - }, - { - "epoch": 1.6039887551052883, - "grad_norm": 1.4206849336624146, - "learning_rate": 2.326685408157853e-05, - "loss": 0.5649, - "step": 181440 - }, - { - "epoch": 1.6040771583656004, - "grad_norm": 0.7768887281417847, - "learning_rate": 2.326538069390666e-05, - "loss": 0.5145, - "step": 181450 - }, - { - "epoch": 1.6041655616259127, - "grad_norm": 2.4371511936187744, - "learning_rate": 2.326390730623479e-05, - "loss": 0.4989, - "step": 181460 - }, - { - "epoch": 1.604253964886225, - "grad_norm": 1.2818129062652588, - "learning_rate": 2.3262433918562918e-05, - "loss": 0.5708, - "step": 181470 - }, - { - "epoch": 1.6043423681465372, - "grad_norm": 5.305901527404785, - "learning_rate": 2.3260960530891046e-05, - "loss": 0.6164, - "step": 181480 - }, - { - "epoch": 1.6044307714068495, - "grad_norm": 4.615675449371338, - "learning_rate": 2.3259487143219178e-05, - "loss": 0.6454, - "step": 181490 - }, - { - "epoch": 1.6045191746671619, - "grad_norm": 1.7052054405212402, - "learning_rate": 2.3258013755547306e-05, - "loss": 0.497, - "step": 181500 - }, - { - "epoch": 1.604607577927474, - "grad_norm": 1.955332636833191, - "learning_rate": 2.3256540367875434e-05, - "loss": 0.6614, - "step": 181510 - }, - { - "epoch": 1.6046959811877861, - "grad_norm": 3.9738073348999023, - "learning_rate": 2.3255066980203563e-05, - "loss": 0.6741, - "step": 181520 - }, - { - "epoch": 1.6047843844480985, - "grad_norm": 5.6055121421813965, - "learning_rate": 2.3253593592531695e-05, - "loss": 0.5592, - "step": 181530 - }, - { - "epoch": 1.6048727877084108, - "grad_norm": 5.648557186126709, - "learning_rate": 2.3252120204859823e-05, - "loss": 0.6688, - "step": 181540 - }, - { - "epoch": 1.604961190968723, - "grad_norm": 3.946701765060425, - "learning_rate": 2.325064681718795e-05, - "loss": 0.5887, - "step": 181550 - }, - { - "epoch": 1.605049594229035, - "grad_norm": 1.046492338180542, - "learning_rate": 2.3249173429516083e-05, - "loss": 0.5832, - "step": 181560 - }, - { - "epoch": 1.6051379974893474, - "grad_norm": 9.238065719604492, - "learning_rate": 2.324770004184421e-05, - "loss": 0.5267, - "step": 181570 - }, - { - "epoch": 1.6052264007496597, - "grad_norm": 12.647065162658691, - "learning_rate": 2.324622665417234e-05, - "loss": 0.7567, - "step": 181580 - }, - { - "epoch": 1.6053148040099718, - "grad_norm": 3.0393753051757812, - "learning_rate": 2.3244753266500468e-05, - "loss": 0.6261, - "step": 181590 - }, - { - "epoch": 1.6054032072702842, - "grad_norm": 2.342805862426758, - "learning_rate": 2.32432798788286e-05, - "loss": 0.4822, - "step": 181600 - }, - { - "epoch": 1.6054916105305965, - "grad_norm": 4.9356279373168945, - "learning_rate": 2.3241806491156728e-05, - "loss": 0.5146, - "step": 181610 - }, - { - "epoch": 1.6055800137909086, - "grad_norm": 1.2645158767700195, - "learning_rate": 2.3240333103484856e-05, - "loss": 0.766, - "step": 181620 - }, - { - "epoch": 1.6056684170512208, - "grad_norm": 1.230264663696289, - "learning_rate": 2.3238859715812985e-05, - "loss": 0.6112, - "step": 181630 - }, - { - "epoch": 1.605756820311533, - "grad_norm": 1.6895595788955688, - "learning_rate": 2.3237386328141117e-05, - "loss": 0.5351, - "step": 181640 - }, - { - "epoch": 1.6058452235718454, - "grad_norm": 10.520718574523926, - "learning_rate": 2.3235912940469245e-05, - "loss": 0.6176, - "step": 181650 - }, - { - "epoch": 1.6059336268321576, - "grad_norm": 5.648880958557129, - "learning_rate": 2.3234439552797373e-05, - "loss": 0.7447, - "step": 181660 - }, - { - "epoch": 1.6060220300924697, - "grad_norm": 0.617428719997406, - "learning_rate": 2.3232966165125505e-05, - "loss": 0.6657, - "step": 181670 - }, - { - "epoch": 1.606110433352782, - "grad_norm": 7.144290924072266, - "learning_rate": 2.3231492777453633e-05, - "loss": 0.5915, - "step": 181680 - }, - { - "epoch": 1.6061988366130944, - "grad_norm": 2.25575590133667, - "learning_rate": 2.3230019389781762e-05, - "loss": 0.5199, - "step": 181690 - }, - { - "epoch": 1.6062872398734065, - "grad_norm": 2.3102056980133057, - "learning_rate": 2.322854600210989e-05, - "loss": 0.6036, - "step": 181700 - }, - { - "epoch": 1.6063756431337186, - "grad_norm": 1.210795521736145, - "learning_rate": 2.3227072614438022e-05, - "loss": 0.6067, - "step": 181710 - }, - { - "epoch": 1.6064640463940312, - "grad_norm": 1.6011179685592651, - "learning_rate": 2.322559922676615e-05, - "loss": 0.4687, - "step": 181720 - }, - { - "epoch": 1.6065524496543433, - "grad_norm": 4.61318826675415, - "learning_rate": 2.322412583909428e-05, - "loss": 0.7097, - "step": 181730 - }, - { - "epoch": 1.6066408529146554, - "grad_norm": 1.1694315671920776, - "learning_rate": 2.322265245142241e-05, - "loss": 0.5813, - "step": 181740 - }, - { - "epoch": 1.6067292561749678, - "grad_norm": 5.971678733825684, - "learning_rate": 2.322117906375054e-05, - "loss": 0.4506, - "step": 181750 - }, - { - "epoch": 1.60681765943528, - "grad_norm": 4.504195213317871, - "learning_rate": 2.3219705676078667e-05, - "loss": 0.5465, - "step": 181760 - }, - { - "epoch": 1.6069060626955922, - "grad_norm": 1.9132769107818604, - "learning_rate": 2.3218232288406795e-05, - "loss": 0.5942, - "step": 181770 - }, - { - "epoch": 1.6069944659559043, - "grad_norm": 7.634122848510742, - "learning_rate": 2.3216758900734927e-05, - "loss": 0.5525, - "step": 181780 - }, - { - "epoch": 1.6070828692162167, - "grad_norm": 9.518406867980957, - "learning_rate": 2.3215285513063055e-05, - "loss": 0.5315, - "step": 181790 - }, - { - "epoch": 1.607171272476529, - "grad_norm": 7.976248741149902, - "learning_rate": 2.3213812125391184e-05, - "loss": 0.6382, - "step": 181800 - }, - { - "epoch": 1.6072596757368411, - "grad_norm": 2.548813819885254, - "learning_rate": 2.3212338737719312e-05, - "loss": 0.5214, - "step": 181810 - }, - { - "epoch": 1.6073480789971533, - "grad_norm": 1.5473272800445557, - "learning_rate": 2.3210865350047444e-05, - "loss": 0.5252, - "step": 181820 - }, - { - "epoch": 1.6074364822574656, - "grad_norm": 2.76377010345459, - "learning_rate": 2.3209391962375572e-05, - "loss": 0.4888, - "step": 181830 - }, - { - "epoch": 1.607524885517778, - "grad_norm": 2.4098384380340576, - "learning_rate": 2.32079185747037e-05, - "loss": 0.5933, - "step": 181840 - }, - { - "epoch": 1.60761328877809, - "grad_norm": 3.50019907951355, - "learning_rate": 2.3206445187031832e-05, - "loss": 0.6098, - "step": 181850 - }, - { - "epoch": 1.6077016920384024, - "grad_norm": 1.6021910905838013, - "learning_rate": 2.320497179935996e-05, - "loss": 0.4224, - "step": 181860 - }, - { - "epoch": 1.6077900952987147, - "grad_norm": 3.7278025150299072, - "learning_rate": 2.320349841168809e-05, - "loss": 0.6151, - "step": 181870 - }, - { - "epoch": 1.6078784985590269, - "grad_norm": 1.9527655839920044, - "learning_rate": 2.3202025024016217e-05, - "loss": 0.571, - "step": 181880 - }, - { - "epoch": 1.607966901819339, - "grad_norm": 2.862412452697754, - "learning_rate": 2.320055163634435e-05, - "loss": 0.5115, - "step": 181890 - }, - { - "epoch": 1.6080553050796513, - "grad_norm": 2.3258020877838135, - "learning_rate": 2.3199078248672477e-05, - "loss": 0.6083, - "step": 181900 - }, - { - "epoch": 1.6081437083399637, - "grad_norm": 2.944877862930298, - "learning_rate": 2.3197604861000606e-05, - "loss": 0.5423, - "step": 181910 - }, - { - "epoch": 1.6082321116002758, - "grad_norm": 1.8457995653152466, - "learning_rate": 2.3196131473328738e-05, - "loss": 0.4872, - "step": 181920 - }, - { - "epoch": 1.608320514860588, - "grad_norm": 3.075913190841675, - "learning_rate": 2.3194658085656866e-05, - "loss": 0.6344, - "step": 181930 - }, - { - "epoch": 1.6084089181209003, - "grad_norm": 3.6242711544036865, - "learning_rate": 2.3193184697984994e-05, - "loss": 0.5162, - "step": 181940 - }, - { - "epoch": 1.6084973213812126, - "grad_norm": 2.1001744270324707, - "learning_rate": 2.3191711310313126e-05, - "loss": 0.4915, - "step": 181950 - }, - { - "epoch": 1.6085857246415247, - "grad_norm": 2.706205368041992, - "learning_rate": 2.3190237922641254e-05, - "loss": 0.7342, - "step": 181960 - }, - { - "epoch": 1.608674127901837, - "grad_norm": 0.8581461906433105, - "learning_rate": 2.3188764534969383e-05, - "loss": 0.5639, - "step": 181970 - }, - { - "epoch": 1.6087625311621494, - "grad_norm": 3.009218215942383, - "learning_rate": 2.3187291147297514e-05, - "loss": 0.5698, - "step": 181980 - }, - { - "epoch": 1.6088509344224615, - "grad_norm": 1.1869412660598755, - "learning_rate": 2.3185817759625643e-05, - "loss": 0.6848, - "step": 181990 - }, - { - "epoch": 1.6089393376827736, - "grad_norm": 2.014723539352417, - "learning_rate": 2.318434437195377e-05, - "loss": 0.6098, - "step": 182000 - }, - { - "epoch": 1.609027740943086, - "grad_norm": 8.267091751098633, - "learning_rate": 2.3182870984281903e-05, - "loss": 0.546, - "step": 182010 - }, - { - "epoch": 1.6091161442033983, - "grad_norm": 2.47029972076416, - "learning_rate": 2.318139759661003e-05, - "loss": 0.6097, - "step": 182020 - }, - { - "epoch": 1.6092045474637104, - "grad_norm": 1.279463768005371, - "learning_rate": 2.317992420893816e-05, - "loss": 0.5847, - "step": 182030 - }, - { - "epoch": 1.6092929507240226, - "grad_norm": 4.717491149902344, - "learning_rate": 2.317845082126629e-05, - "loss": 0.5985, - "step": 182040 - }, - { - "epoch": 1.609381353984335, - "grad_norm": 17.589853286743164, - "learning_rate": 2.317697743359442e-05, - "loss": 0.5873, - "step": 182050 - }, - { - "epoch": 1.6094697572446472, - "grad_norm": 3.970960855484009, - "learning_rate": 2.3175504045922548e-05, - "loss": 0.6575, - "step": 182060 - }, - { - "epoch": 1.6095581605049594, - "grad_norm": 2.1887660026550293, - "learning_rate": 2.317403065825068e-05, - "loss": 0.5719, - "step": 182070 - }, - { - "epoch": 1.6096465637652717, - "grad_norm": 6.2741007804870605, - "learning_rate": 2.3172557270578808e-05, - "loss": 0.6173, - "step": 182080 - }, - { - "epoch": 1.609734967025584, - "grad_norm": 4.65067195892334, - "learning_rate": 2.3171083882906937e-05, - "loss": 0.5542, - "step": 182090 - }, - { - "epoch": 1.6098233702858962, - "grad_norm": 3.2295310497283936, - "learning_rate": 2.3169610495235065e-05, - "loss": 0.5144, - "step": 182100 - }, - { - "epoch": 1.6099117735462083, - "grad_norm": 3.585876226425171, - "learning_rate": 2.3168137107563197e-05, - "loss": 0.6248, - "step": 182110 - }, - { - "epoch": 1.6100001768065206, - "grad_norm": 1.9754563570022583, - "learning_rate": 2.3166663719891325e-05, - "loss": 0.6352, - "step": 182120 - }, - { - "epoch": 1.610088580066833, - "grad_norm": 1.6661945581436157, - "learning_rate": 2.3165190332219453e-05, - "loss": 0.4967, - "step": 182130 - }, - { - "epoch": 1.610176983327145, - "grad_norm": 0.993270993232727, - "learning_rate": 2.3163716944547585e-05, - "loss": 0.5103, - "step": 182140 - }, - { - "epoch": 1.6102653865874572, - "grad_norm": 11.330484390258789, - "learning_rate": 2.3162243556875713e-05, - "loss": 0.5816, - "step": 182150 - }, - { - "epoch": 1.6103537898477696, - "grad_norm": 3.327075958251953, - "learning_rate": 2.3160770169203842e-05, - "loss": 0.5867, - "step": 182160 - }, - { - "epoch": 1.610442193108082, - "grad_norm": 4.12709903717041, - "learning_rate": 2.315929678153197e-05, - "loss": 0.5012, - "step": 182170 - }, - { - "epoch": 1.610530596368394, - "grad_norm": 2.376232624053955, - "learning_rate": 2.3157823393860102e-05, - "loss": 0.5629, - "step": 182180 - }, - { - "epoch": 1.6106189996287064, - "grad_norm": 1.1024261713027954, - "learning_rate": 2.315635000618823e-05, - "loss": 0.5488, - "step": 182190 - }, - { - "epoch": 1.6107074028890187, - "grad_norm": 5.935842514038086, - "learning_rate": 2.315487661851636e-05, - "loss": 0.5842, - "step": 182200 - }, - { - "epoch": 1.6107958061493308, - "grad_norm": 8.251925468444824, - "learning_rate": 2.315340323084449e-05, - "loss": 0.5993, - "step": 182210 - }, - { - "epoch": 1.610884209409643, - "grad_norm": 1.3976600170135498, - "learning_rate": 2.315192984317262e-05, - "loss": 0.6335, - "step": 182220 - }, - { - "epoch": 1.6109726126699553, - "grad_norm": 10.666242599487305, - "learning_rate": 2.3150456455500747e-05, - "loss": 0.6517, - "step": 182230 - }, - { - "epoch": 1.6110610159302676, - "grad_norm": 4.050312519073486, - "learning_rate": 2.3148983067828875e-05, - "loss": 0.595, - "step": 182240 - }, - { - "epoch": 1.6111494191905797, - "grad_norm": 5.496546268463135, - "learning_rate": 2.3147509680157007e-05, - "loss": 0.5611, - "step": 182250 - }, - { - "epoch": 1.6112378224508919, - "grad_norm": 1.9264631271362305, - "learning_rate": 2.3146036292485135e-05, - "loss": 0.5173, - "step": 182260 - }, - { - "epoch": 1.6113262257112042, - "grad_norm": 2.7983193397521973, - "learning_rate": 2.3144562904813264e-05, - "loss": 0.6491, - "step": 182270 - }, - { - "epoch": 1.6114146289715165, - "grad_norm": 5.818872451782227, - "learning_rate": 2.3143089517141392e-05, - "loss": 0.5475, - "step": 182280 - }, - { - "epoch": 1.6115030322318287, - "grad_norm": 1.6825203895568848, - "learning_rate": 2.3141616129469524e-05, - "loss": 0.6427, - "step": 182290 - }, - { - "epoch": 1.6115914354921408, - "grad_norm": 1.9343751668930054, - "learning_rate": 2.3140142741797652e-05, - "loss": 0.5137, - "step": 182300 - }, - { - "epoch": 1.6116798387524534, - "grad_norm": 13.85970401763916, - "learning_rate": 2.313866935412578e-05, - "loss": 0.6143, - "step": 182310 - }, - { - "epoch": 1.6117682420127655, - "grad_norm": 2.501319646835327, - "learning_rate": 2.3137195966453912e-05, - "loss": 0.5665, - "step": 182320 - }, - { - "epoch": 1.6118566452730776, - "grad_norm": 2.020782709121704, - "learning_rate": 2.313572257878204e-05, - "loss": 0.5982, - "step": 182330 - }, - { - "epoch": 1.61194504853339, - "grad_norm": 3.299557685852051, - "learning_rate": 2.313424919111017e-05, - "loss": 0.5345, - "step": 182340 - }, - { - "epoch": 1.6120334517937023, - "grad_norm": 2.0023138523101807, - "learning_rate": 2.3132775803438297e-05, - "loss": 0.6338, - "step": 182350 - }, - { - "epoch": 1.6121218550540144, - "grad_norm": 1.1032688617706299, - "learning_rate": 2.313130241576643e-05, - "loss": 0.7372, - "step": 182360 - }, - { - "epoch": 1.6122102583143265, - "grad_norm": 1.0570935010910034, - "learning_rate": 2.3129829028094558e-05, - "loss": 0.4742, - "step": 182370 - }, - { - "epoch": 1.6122986615746389, - "grad_norm": 4.266687393188477, - "learning_rate": 2.3128355640422686e-05, - "loss": 0.6086, - "step": 182380 - }, - { - "epoch": 1.6123870648349512, - "grad_norm": 4.972256183624268, - "learning_rate": 2.3126882252750818e-05, - "loss": 0.7079, - "step": 182390 - }, - { - "epoch": 1.6124754680952633, - "grad_norm": 1.728886604309082, - "learning_rate": 2.3125408865078946e-05, - "loss": 0.5929, - "step": 182400 - }, - { - "epoch": 1.6125638713555754, - "grad_norm": 1.6776295900344849, - "learning_rate": 2.3123935477407074e-05, - "loss": 0.6809, - "step": 182410 - }, - { - "epoch": 1.6126522746158878, - "grad_norm": 7.576881408691406, - "learning_rate": 2.3122462089735203e-05, - "loss": 0.6855, - "step": 182420 - }, - { - "epoch": 1.6127406778762001, - "grad_norm": 16.358537673950195, - "learning_rate": 2.3120988702063334e-05, - "loss": 0.6711, - "step": 182430 - }, - { - "epoch": 1.6128290811365122, - "grad_norm": 1.77681303024292, - "learning_rate": 2.3119515314391463e-05, - "loss": 0.5875, - "step": 182440 - }, - { - "epoch": 1.6129174843968246, - "grad_norm": 1.8097878694534302, - "learning_rate": 2.311804192671959e-05, - "loss": 0.5834, - "step": 182450 - }, - { - "epoch": 1.613005887657137, - "grad_norm": 4.5516180992126465, - "learning_rate": 2.311656853904772e-05, - "loss": 0.6121, - "step": 182460 - }, - { - "epoch": 1.613094290917449, - "grad_norm": 8.049764633178711, - "learning_rate": 2.311509515137585e-05, - "loss": 0.6117, - "step": 182470 - }, - { - "epoch": 1.6131826941777612, - "grad_norm": 3.6471707820892334, - "learning_rate": 2.311362176370398e-05, - "loss": 0.6182, - "step": 182480 - }, - { - "epoch": 1.6132710974380735, - "grad_norm": 1.2097395658493042, - "learning_rate": 2.3112148376032108e-05, - "loss": 0.6337, - "step": 182490 - }, - { - "epoch": 1.6133595006983859, - "grad_norm": 1.8433586359024048, - "learning_rate": 2.311067498836024e-05, - "loss": 0.6182, - "step": 182500 - }, - { - "epoch": 1.613447903958698, - "grad_norm": 1.2184592485427856, - "learning_rate": 2.3109201600688368e-05, - "loss": 0.7434, - "step": 182510 - }, - { - "epoch": 1.61353630721901, - "grad_norm": 9.369141578674316, - "learning_rate": 2.3107728213016496e-05, - "loss": 0.6229, - "step": 182520 - }, - { - "epoch": 1.6136247104793224, - "grad_norm": 13.255956649780273, - "learning_rate": 2.3106254825344625e-05, - "loss": 0.6211, - "step": 182530 - }, - { - "epoch": 1.6137131137396348, - "grad_norm": 10.364533424377441, - "learning_rate": 2.3104781437672757e-05, - "loss": 0.7024, - "step": 182540 - }, - { - "epoch": 1.613801516999947, - "grad_norm": 3.991870880126953, - "learning_rate": 2.3103308050000885e-05, - "loss": 0.6564, - "step": 182550 - }, - { - "epoch": 1.6138899202602592, - "grad_norm": 9.33375358581543, - "learning_rate": 2.3101834662329013e-05, - "loss": 0.6453, - "step": 182560 - }, - { - "epoch": 1.6139783235205716, - "grad_norm": 1.5691694021224976, - "learning_rate": 2.3100361274657145e-05, - "loss": 0.4641, - "step": 182570 - }, - { - "epoch": 1.6140667267808837, - "grad_norm": 2.3496265411376953, - "learning_rate": 2.3098887886985273e-05, - "loss": 0.5926, - "step": 182580 - }, - { - "epoch": 1.6141551300411958, - "grad_norm": 6.989462375640869, - "learning_rate": 2.30974144993134e-05, - "loss": 0.54, - "step": 182590 - }, - { - "epoch": 1.6142435333015082, - "grad_norm": 1.6402604579925537, - "learning_rate": 2.309594111164153e-05, - "loss": 0.4626, - "step": 182600 - }, - { - "epoch": 1.6143319365618205, - "grad_norm": 2.9409546852111816, - "learning_rate": 2.3094467723969662e-05, - "loss": 0.5875, - "step": 182610 - }, - { - "epoch": 1.6144203398221326, - "grad_norm": 10.978370666503906, - "learning_rate": 2.309299433629779e-05, - "loss": 0.604, - "step": 182620 - }, - { - "epoch": 1.6145087430824447, - "grad_norm": 5.703006744384766, - "learning_rate": 2.309152094862592e-05, - "loss": 0.5203, - "step": 182630 - }, - { - "epoch": 1.614597146342757, - "grad_norm": 3.4042088985443115, - "learning_rate": 2.3090047560954047e-05, - "loss": 0.7101, - "step": 182640 - }, - { - "epoch": 1.6146855496030694, - "grad_norm": 4.665914535522461, - "learning_rate": 2.308857417328218e-05, - "loss": 0.5858, - "step": 182650 - }, - { - "epoch": 1.6147739528633815, - "grad_norm": 2.900662660598755, - "learning_rate": 2.3087100785610307e-05, - "loss": 0.5946, - "step": 182660 - }, - { - "epoch": 1.6148623561236939, - "grad_norm": 5.946843147277832, - "learning_rate": 2.3085627397938435e-05, - "loss": 0.563, - "step": 182670 - }, - { - "epoch": 1.6149507593840062, - "grad_norm": 13.57431697845459, - "learning_rate": 2.3084154010266567e-05, - "loss": 0.6278, - "step": 182680 - }, - { - "epoch": 1.6150391626443183, - "grad_norm": 1.0160492658615112, - "learning_rate": 2.3082680622594695e-05, - "loss": 0.5475, - "step": 182690 - }, - { - "epoch": 1.6151275659046305, - "grad_norm": 1.1510932445526123, - "learning_rate": 2.3081207234922824e-05, - "loss": 0.5453, - "step": 182700 - }, - { - "epoch": 1.6152159691649428, - "grad_norm": 2.0452396869659424, - "learning_rate": 2.3079733847250952e-05, - "loss": 0.6262, - "step": 182710 - }, - { - "epoch": 1.6153043724252552, - "grad_norm": 1.8475239276885986, - "learning_rate": 2.3078260459579084e-05, - "loss": 0.5877, - "step": 182720 - }, - { - "epoch": 1.6153927756855673, - "grad_norm": 2.4507598876953125, - "learning_rate": 2.3076787071907212e-05, - "loss": 0.6673, - "step": 182730 - }, - { - "epoch": 1.6154811789458794, - "grad_norm": 8.006498336791992, - "learning_rate": 2.307531368423534e-05, - "loss": 0.5106, - "step": 182740 - }, - { - "epoch": 1.6155695822061917, - "grad_norm": 2.00193190574646, - "learning_rate": 2.307384029656347e-05, - "loss": 0.6629, - "step": 182750 - }, - { - "epoch": 1.615657985466504, - "grad_norm": 1.2723567485809326, - "learning_rate": 2.30723669088916e-05, - "loss": 0.6077, - "step": 182760 - }, - { - "epoch": 1.6157463887268162, - "grad_norm": 11.683198928833008, - "learning_rate": 2.307089352121973e-05, - "loss": 0.6445, - "step": 182770 - }, - { - "epoch": 1.6158347919871285, - "grad_norm": 2.3526580333709717, - "learning_rate": 2.3069420133547857e-05, - "loss": 0.5111, - "step": 182780 - }, - { - "epoch": 1.6159231952474409, - "grad_norm": 3.677626848220825, - "learning_rate": 2.306794674587599e-05, - "loss": 0.6075, - "step": 182790 - }, - { - "epoch": 1.616011598507753, - "grad_norm": 1.0365103483200073, - "learning_rate": 2.3066473358204117e-05, - "loss": 0.4695, - "step": 182800 - }, - { - "epoch": 1.6161000017680651, - "grad_norm": 1.7588698863983154, - "learning_rate": 2.3064999970532246e-05, - "loss": 0.5403, - "step": 182810 - }, - { - "epoch": 1.6161884050283775, - "grad_norm": 1.7166516780853271, - "learning_rate": 2.3063526582860374e-05, - "loss": 0.5617, - "step": 182820 - }, - { - "epoch": 1.6162768082886898, - "grad_norm": 4.186545372009277, - "learning_rate": 2.3062053195188506e-05, - "loss": 0.586, - "step": 182830 - }, - { - "epoch": 1.616365211549002, - "grad_norm": 13.704556465148926, - "learning_rate": 2.3060579807516634e-05, - "loss": 0.6511, - "step": 182840 - }, - { - "epoch": 1.616453614809314, - "grad_norm": 13.228602409362793, - "learning_rate": 2.3059106419844763e-05, - "loss": 0.5323, - "step": 182850 - }, - { - "epoch": 1.6165420180696264, - "grad_norm": 5.97099494934082, - "learning_rate": 2.3057633032172894e-05, - "loss": 0.597, - "step": 182860 - }, - { - "epoch": 1.6166304213299387, - "grad_norm": 1.8951526880264282, - "learning_rate": 2.3056159644501023e-05, - "loss": 0.5811, - "step": 182870 - }, - { - "epoch": 1.6167188245902508, - "grad_norm": 3.4787559509277344, - "learning_rate": 2.3054686256829154e-05, - "loss": 0.5543, - "step": 182880 - }, - { - "epoch": 1.616807227850563, - "grad_norm": 3.7139663696289062, - "learning_rate": 2.3053212869157283e-05, - "loss": 0.6589, - "step": 182890 - }, - { - "epoch": 1.6168956311108755, - "grad_norm": 5.257376670837402, - "learning_rate": 2.305173948148541e-05, - "loss": 0.6923, - "step": 182900 - }, - { - "epoch": 1.6169840343711877, - "grad_norm": 3.8011438846588135, - "learning_rate": 2.3050266093813543e-05, - "loss": 0.7006, - "step": 182910 - }, - { - "epoch": 1.6170724376314998, - "grad_norm": 2.4830210208892822, - "learning_rate": 2.304879270614167e-05, - "loss": 0.6573, - "step": 182920 - }, - { - "epoch": 1.6171608408918121, - "grad_norm": 0.9808798432350159, - "learning_rate": 2.30473193184698e-05, - "loss": 0.5592, - "step": 182930 - }, - { - "epoch": 1.6172492441521245, - "grad_norm": 1.3411725759506226, - "learning_rate": 2.304584593079793e-05, - "loss": 0.5643, - "step": 182940 - }, - { - "epoch": 1.6173376474124366, - "grad_norm": 1.004717469215393, - "learning_rate": 2.304437254312606e-05, - "loss": 0.5945, - "step": 182950 - }, - { - "epoch": 1.6174260506727487, - "grad_norm": 3.487877130508423, - "learning_rate": 2.3042899155454188e-05, - "loss": 0.6477, - "step": 182960 - }, - { - "epoch": 1.617514453933061, - "grad_norm": 5.3697638511657715, - "learning_rate": 2.304142576778232e-05, - "loss": 0.4594, - "step": 182970 - }, - { - "epoch": 1.6176028571933734, - "grad_norm": 2.046640396118164, - "learning_rate": 2.3039952380110448e-05, - "loss": 0.6383, - "step": 182980 - }, - { - "epoch": 1.6176912604536855, - "grad_norm": 9.25212574005127, - "learning_rate": 2.3038478992438576e-05, - "loss": 0.6077, - "step": 182990 - }, - { - "epoch": 1.6177796637139976, - "grad_norm": 2.4157204627990723, - "learning_rate": 2.3037005604766705e-05, - "loss": 0.6537, - "step": 183000 - }, - { - "epoch": 1.61786806697431, - "grad_norm": 5.2383713722229, - "learning_rate": 2.3035532217094837e-05, - "loss": 0.5277, - "step": 183010 - }, - { - "epoch": 1.6179564702346223, - "grad_norm": 5.388779163360596, - "learning_rate": 2.3034058829422965e-05, - "loss": 0.581, - "step": 183020 - }, - { - "epoch": 1.6180448734949344, - "grad_norm": 3.4251158237457275, - "learning_rate": 2.3032585441751093e-05, - "loss": 0.6993, - "step": 183030 - }, - { - "epoch": 1.6181332767552468, - "grad_norm": 0.8551393151283264, - "learning_rate": 2.3031112054079225e-05, - "loss": 0.6008, - "step": 183040 - }, - { - "epoch": 1.618221680015559, - "grad_norm": 1.4848769903182983, - "learning_rate": 2.3029638666407353e-05, - "loss": 0.5718, - "step": 183050 - }, - { - "epoch": 1.6183100832758712, - "grad_norm": 4.130042552947998, - "learning_rate": 2.3028165278735482e-05, - "loss": 0.5397, - "step": 183060 - }, - { - "epoch": 1.6183984865361833, - "grad_norm": 13.945141792297363, - "learning_rate": 2.302669189106361e-05, - "loss": 0.6131, - "step": 183070 - }, - { - "epoch": 1.6184868897964957, - "grad_norm": 1.6707444190979004, - "learning_rate": 2.3025218503391742e-05, - "loss": 0.439, - "step": 183080 - }, - { - "epoch": 1.618575293056808, - "grad_norm": 2.354923725128174, - "learning_rate": 2.302374511571987e-05, - "loss": 0.5689, - "step": 183090 - }, - { - "epoch": 1.6186636963171201, - "grad_norm": 1.6011011600494385, - "learning_rate": 2.3022271728048e-05, - "loss": 0.5743, - "step": 183100 - }, - { - "epoch": 1.6187520995774323, - "grad_norm": 1.0173894166946411, - "learning_rate": 2.3020798340376127e-05, - "loss": 0.5217, - "step": 183110 - }, - { - "epoch": 1.6188405028377446, - "grad_norm": 1.8395146131515503, - "learning_rate": 2.301932495270426e-05, - "loss": 0.5786, - "step": 183120 - }, - { - "epoch": 1.618928906098057, - "grad_norm": 4.783468246459961, - "learning_rate": 2.3017851565032387e-05, - "loss": 0.53, - "step": 183130 - }, - { - "epoch": 1.619017309358369, - "grad_norm": 5.537186145782471, - "learning_rate": 2.3016378177360515e-05, - "loss": 0.6554, - "step": 183140 - }, - { - "epoch": 1.6191057126186814, - "grad_norm": 2.122586727142334, - "learning_rate": 2.3014904789688647e-05, - "loss": 0.6046, - "step": 183150 - }, - { - "epoch": 1.6191941158789938, - "grad_norm": 7.516603469848633, - "learning_rate": 2.3013431402016775e-05, - "loss": 0.5209, - "step": 183160 - }, - { - "epoch": 1.6192825191393059, - "grad_norm": 2.1614632606506348, - "learning_rate": 2.3011958014344904e-05, - "loss": 0.5478, - "step": 183170 - }, - { - "epoch": 1.619370922399618, - "grad_norm": 2.4288723468780518, - "learning_rate": 2.3010484626673032e-05, - "loss": 0.6448, - "step": 183180 - }, - { - "epoch": 1.6194593256599303, - "grad_norm": 3.185554027557373, - "learning_rate": 2.3009011239001164e-05, - "loss": 0.4708, - "step": 183190 - }, - { - "epoch": 1.6195477289202427, - "grad_norm": 1.534436583518982, - "learning_rate": 2.3007537851329292e-05, - "loss": 0.5635, - "step": 183200 - }, - { - "epoch": 1.6196361321805548, - "grad_norm": 11.226266860961914, - "learning_rate": 2.300606446365742e-05, - "loss": 0.5708, - "step": 183210 - }, - { - "epoch": 1.619724535440867, - "grad_norm": 2.137450933456421, - "learning_rate": 2.300459107598555e-05, - "loss": 0.7895, - "step": 183220 - }, - { - "epoch": 1.6198129387011793, - "grad_norm": 4.291244029998779, - "learning_rate": 2.300311768831368e-05, - "loss": 0.6136, - "step": 183230 - }, - { - "epoch": 1.6199013419614916, - "grad_norm": 1.8652474880218506, - "learning_rate": 2.300164430064181e-05, - "loss": 0.6109, - "step": 183240 - }, - { - "epoch": 1.6199897452218037, - "grad_norm": 2.3695945739746094, - "learning_rate": 2.3000170912969937e-05, - "loss": 0.4908, - "step": 183250 - }, - { - "epoch": 1.620078148482116, - "grad_norm": 4.933897972106934, - "learning_rate": 2.299869752529807e-05, - "loss": 0.6101, - "step": 183260 - }, - { - "epoch": 1.6201665517424284, - "grad_norm": 1.580335259437561, - "learning_rate": 2.2997224137626197e-05, - "loss": 0.5333, - "step": 183270 - }, - { - "epoch": 1.6202549550027405, - "grad_norm": 2.3352818489074707, - "learning_rate": 2.2995750749954326e-05, - "loss": 0.6282, - "step": 183280 - }, - { - "epoch": 1.6203433582630526, - "grad_norm": 7.141417026519775, - "learning_rate": 2.2994277362282454e-05, - "loss": 0.7438, - "step": 183290 - }, - { - "epoch": 1.620431761523365, - "grad_norm": 3.556039810180664, - "learning_rate": 2.2992803974610586e-05, - "loss": 0.5468, - "step": 183300 - }, - { - "epoch": 1.6205201647836773, - "grad_norm": 4.072267055511475, - "learning_rate": 2.2991330586938714e-05, - "loss": 0.6712, - "step": 183310 - }, - { - "epoch": 1.6206085680439895, - "grad_norm": 2.012181282043457, - "learning_rate": 2.2989857199266843e-05, - "loss": 0.624, - "step": 183320 - }, - { - "epoch": 1.6206969713043016, - "grad_norm": 1.802600622177124, - "learning_rate": 2.2988383811594974e-05, - "loss": 0.5486, - "step": 183330 - }, - { - "epoch": 1.620785374564614, - "grad_norm": 9.85913372039795, - "learning_rate": 2.2986910423923103e-05, - "loss": 0.646, - "step": 183340 - }, - { - "epoch": 1.6208737778249263, - "grad_norm": 1.4825754165649414, - "learning_rate": 2.298543703625123e-05, - "loss": 0.6334, - "step": 183350 - }, - { - "epoch": 1.6209621810852384, - "grad_norm": 1.2595270872116089, - "learning_rate": 2.298396364857936e-05, - "loss": 0.6473, - "step": 183360 - }, - { - "epoch": 1.6210505843455507, - "grad_norm": 1.9350718259811401, - "learning_rate": 2.298249026090749e-05, - "loss": 0.6663, - "step": 183370 - }, - { - "epoch": 1.621138987605863, - "grad_norm": 2.0129005908966064, - "learning_rate": 2.298101687323562e-05, - "loss": 0.5177, - "step": 183380 - }, - { - "epoch": 1.6212273908661752, - "grad_norm": 2.1219167709350586, - "learning_rate": 2.2979543485563748e-05, - "loss": 0.4817, - "step": 183390 - }, - { - "epoch": 1.6213157941264873, - "grad_norm": 1.235468864440918, - "learning_rate": 2.2978070097891876e-05, - "loss": 0.5827, - "step": 183400 - }, - { - "epoch": 1.6214041973867996, - "grad_norm": 4.386280536651611, - "learning_rate": 2.2976596710220008e-05, - "loss": 0.5182, - "step": 183410 - }, - { - "epoch": 1.621492600647112, - "grad_norm": 1.6221117973327637, - "learning_rate": 2.2975123322548136e-05, - "loss": 0.5621, - "step": 183420 - }, - { - "epoch": 1.621581003907424, - "grad_norm": 1.7423888444900513, - "learning_rate": 2.2973649934876265e-05, - "loss": 0.5202, - "step": 183430 - }, - { - "epoch": 1.6216694071677362, - "grad_norm": 3.144033908843994, - "learning_rate": 2.2972176547204396e-05, - "loss": 0.6239, - "step": 183440 - }, - { - "epoch": 1.6217578104280486, - "grad_norm": 3.2087838649749756, - "learning_rate": 2.2970703159532525e-05, - "loss": 0.6817, - "step": 183450 - }, - { - "epoch": 1.621846213688361, - "grad_norm": 2.837109088897705, - "learning_rate": 2.2969229771860653e-05, - "loss": 0.6833, - "step": 183460 - }, - { - "epoch": 1.621934616948673, - "grad_norm": 1.2741280794143677, - "learning_rate": 2.296775638418878e-05, - "loss": 0.5932, - "step": 183470 - }, - { - "epoch": 1.6220230202089851, - "grad_norm": 3.0609824657440186, - "learning_rate": 2.2966282996516913e-05, - "loss": 0.5633, - "step": 183480 - }, - { - "epoch": 1.6221114234692977, - "grad_norm": 9.062402725219727, - "learning_rate": 2.296480960884504e-05, - "loss": 0.5761, - "step": 183490 - }, - { - "epoch": 1.6221998267296098, - "grad_norm": 1.9718917608261108, - "learning_rate": 2.296333622117317e-05, - "loss": 0.6681, - "step": 183500 - }, - { - "epoch": 1.622288229989922, - "grad_norm": 6.590805530548096, - "learning_rate": 2.2961862833501302e-05, - "loss": 0.5325, - "step": 183510 - }, - { - "epoch": 1.6223766332502343, - "grad_norm": 2.291339159011841, - "learning_rate": 2.296038944582943e-05, - "loss": 0.5877, - "step": 183520 - }, - { - "epoch": 1.6224650365105466, - "grad_norm": 2.2934045791625977, - "learning_rate": 2.295891605815756e-05, - "loss": 0.6308, - "step": 183530 - }, - { - "epoch": 1.6225534397708588, - "grad_norm": 2.509108781814575, - "learning_rate": 2.2957442670485687e-05, - "loss": 0.5279, - "step": 183540 - }, - { - "epoch": 1.6226418430311709, - "grad_norm": 2.1688010692596436, - "learning_rate": 2.295596928281382e-05, - "loss": 0.537, - "step": 183550 - }, - { - "epoch": 1.6227302462914832, - "grad_norm": 3.1773228645324707, - "learning_rate": 2.2954495895141947e-05, - "loss": 0.5855, - "step": 183560 - }, - { - "epoch": 1.6228186495517956, - "grad_norm": 1.686359167098999, - "learning_rate": 2.2953022507470075e-05, - "loss": 0.5335, - "step": 183570 - }, - { - "epoch": 1.6229070528121077, - "grad_norm": 2.2279937267303467, - "learning_rate": 2.2951549119798204e-05, - "loss": 0.6343, - "step": 183580 - }, - { - "epoch": 1.6229954560724198, - "grad_norm": 2.9656448364257812, - "learning_rate": 2.2950075732126335e-05, - "loss": 0.6324, - "step": 183590 - }, - { - "epoch": 1.6230838593327321, - "grad_norm": 31.457103729248047, - "learning_rate": 2.2948602344454464e-05, - "loss": 0.6671, - "step": 183600 - }, - { - "epoch": 1.6231722625930445, - "grad_norm": 2.75780987739563, - "learning_rate": 2.2947128956782592e-05, - "loss": 0.7377, - "step": 183610 - }, - { - "epoch": 1.6232606658533566, - "grad_norm": 1.5926953554153442, - "learning_rate": 2.2945655569110724e-05, - "loss": 0.603, - "step": 183620 - }, - { - "epoch": 1.623349069113669, - "grad_norm": 3.640963554382324, - "learning_rate": 2.2944182181438852e-05, - "loss": 0.583, - "step": 183630 - }, - { - "epoch": 1.6234374723739813, - "grad_norm": 2.1036155223846436, - "learning_rate": 2.294270879376698e-05, - "loss": 0.5616, - "step": 183640 - }, - { - "epoch": 1.6235258756342934, - "grad_norm": 4.902594089508057, - "learning_rate": 2.294123540609511e-05, - "loss": 0.6791, - "step": 183650 - }, - { - "epoch": 1.6236142788946055, - "grad_norm": 1.7234135866165161, - "learning_rate": 2.293976201842324e-05, - "loss": 0.6306, - "step": 183660 - }, - { - "epoch": 1.6237026821549179, - "grad_norm": 3.0011208057403564, - "learning_rate": 2.293828863075137e-05, - "loss": 0.5174, - "step": 183670 - }, - { - "epoch": 1.6237910854152302, - "grad_norm": 4.035562992095947, - "learning_rate": 2.2936815243079497e-05, - "loss": 0.4826, - "step": 183680 - }, - { - "epoch": 1.6238794886755423, - "grad_norm": 1.7754682302474976, - "learning_rate": 2.293534185540763e-05, - "loss": 0.6573, - "step": 183690 - }, - { - "epoch": 1.6239678919358544, - "grad_norm": 0.8616647720336914, - "learning_rate": 2.2933868467735757e-05, - "loss": 0.6085, - "step": 183700 - }, - { - "epoch": 1.6240562951961668, - "grad_norm": 4.318363666534424, - "learning_rate": 2.2932395080063886e-05, - "loss": 0.6725, - "step": 183710 - }, - { - "epoch": 1.6241446984564791, - "grad_norm": 2.096954345703125, - "learning_rate": 2.2930921692392014e-05, - "loss": 0.5702, - "step": 183720 - }, - { - "epoch": 1.6242331017167912, - "grad_norm": 4.886160373687744, - "learning_rate": 2.2929448304720146e-05, - "loss": 0.5545, - "step": 183730 - }, - { - "epoch": 1.6243215049771036, - "grad_norm": 1.7509719133377075, - "learning_rate": 2.2927974917048274e-05, - "loss": 0.6568, - "step": 183740 - }, - { - "epoch": 1.624409908237416, - "grad_norm": 1.8688541650772095, - "learning_rate": 2.2926501529376403e-05, - "loss": 0.7127, - "step": 183750 - }, - { - "epoch": 1.624498311497728, - "grad_norm": 1.086142659187317, - "learning_rate": 2.2925028141704534e-05, - "loss": 0.4725, - "step": 183760 - }, - { - "epoch": 1.6245867147580402, - "grad_norm": 5.558563709259033, - "learning_rate": 2.2923554754032663e-05, - "loss": 0.6067, - "step": 183770 - }, - { - "epoch": 1.6246751180183525, - "grad_norm": 2.00053071975708, - "learning_rate": 2.292208136636079e-05, - "loss": 0.5779, - "step": 183780 - }, - { - "epoch": 1.6247635212786649, - "grad_norm": 2.470217227935791, - "learning_rate": 2.2920607978688923e-05, - "loss": 0.6558, - "step": 183790 - }, - { - "epoch": 1.624851924538977, - "grad_norm": 1.4910800457000732, - "learning_rate": 2.291913459101705e-05, - "loss": 0.5049, - "step": 183800 - }, - { - "epoch": 1.624940327799289, - "grad_norm": 3.218100070953369, - "learning_rate": 2.291766120334518e-05, - "loss": 0.5279, - "step": 183810 - }, - { - "epoch": 1.6250287310596014, - "grad_norm": 4.51097297668457, - "learning_rate": 2.291618781567331e-05, - "loss": 0.6609, - "step": 183820 - }, - { - "epoch": 1.6251171343199138, - "grad_norm": 3.820086717605591, - "learning_rate": 2.291471442800144e-05, - "loss": 0.6391, - "step": 183830 - }, - { - "epoch": 1.625205537580226, - "grad_norm": 2.5522615909576416, - "learning_rate": 2.2913241040329568e-05, - "loss": 0.6297, - "step": 183840 - }, - { - "epoch": 1.6252939408405382, - "grad_norm": 1.307796597480774, - "learning_rate": 2.29117676526577e-05, - "loss": 0.6185, - "step": 183850 - }, - { - "epoch": 1.6253823441008506, - "grad_norm": 5.658438682556152, - "learning_rate": 2.2910294264985828e-05, - "loss": 0.765, - "step": 183860 - }, - { - "epoch": 1.6254707473611627, - "grad_norm": 2.9527199268341064, - "learning_rate": 2.2908820877313956e-05, - "loss": 0.5706, - "step": 183870 - }, - { - "epoch": 1.6255591506214748, - "grad_norm": 6.69216775894165, - "learning_rate": 2.2907347489642088e-05, - "loss": 0.5828, - "step": 183880 - }, - { - "epoch": 1.6256475538817872, - "grad_norm": 2.156167984008789, - "learning_rate": 2.2905874101970216e-05, - "loss": 0.5231, - "step": 183890 - }, - { - "epoch": 1.6257359571420995, - "grad_norm": 18.370405197143555, - "learning_rate": 2.2904400714298345e-05, - "loss": 0.7013, - "step": 183900 - }, - { - "epoch": 1.6258243604024116, - "grad_norm": 7.3644609451293945, - "learning_rate": 2.2902927326626477e-05, - "loss": 0.6461, - "step": 183910 - }, - { - "epoch": 1.6259127636627237, - "grad_norm": 5.743333339691162, - "learning_rate": 2.2901453938954605e-05, - "loss": 0.6395, - "step": 183920 - }, - { - "epoch": 1.626001166923036, - "grad_norm": 1.135197401046753, - "learning_rate": 2.2899980551282733e-05, - "loss": 0.6282, - "step": 183930 - }, - { - "epoch": 1.6260895701833484, - "grad_norm": 1.5497599840164185, - "learning_rate": 2.289850716361086e-05, - "loss": 0.5932, - "step": 183940 - }, - { - "epoch": 1.6261779734436606, - "grad_norm": 1.3106706142425537, - "learning_rate": 2.2897033775938993e-05, - "loss": 0.624, - "step": 183950 - }, - { - "epoch": 1.626266376703973, - "grad_norm": 1.3661928176879883, - "learning_rate": 2.289556038826712e-05, - "loss": 0.6013, - "step": 183960 - }, - { - "epoch": 1.6263547799642852, - "grad_norm": 1.613750696182251, - "learning_rate": 2.289408700059525e-05, - "loss": 0.5678, - "step": 183970 - }, - { - "epoch": 1.6264431832245974, - "grad_norm": 1.8886228799819946, - "learning_rate": 2.2892613612923382e-05, - "loss": 0.6511, - "step": 183980 - }, - { - "epoch": 1.6265315864849095, - "grad_norm": 3.687861204147339, - "learning_rate": 2.289114022525151e-05, - "loss": 0.5881, - "step": 183990 - }, - { - "epoch": 1.6266199897452218, - "grad_norm": 1.4639226198196411, - "learning_rate": 2.288966683757964e-05, - "loss": 0.6218, - "step": 184000 - }, - { - "epoch": 1.6267083930055342, - "grad_norm": 0.9775075316429138, - "learning_rate": 2.2888193449907767e-05, - "loss": 0.614, - "step": 184010 - }, - { - "epoch": 1.6267967962658463, - "grad_norm": 4.130499839782715, - "learning_rate": 2.28867200622359e-05, - "loss": 0.5666, - "step": 184020 - }, - { - "epoch": 1.6268851995261584, - "grad_norm": 7.859674453735352, - "learning_rate": 2.2885246674564027e-05, - "loss": 0.6281, - "step": 184030 - }, - { - "epoch": 1.6269736027864707, - "grad_norm": 0.9946154952049255, - "learning_rate": 2.2883773286892155e-05, - "loss": 0.5167, - "step": 184040 - }, - { - "epoch": 1.627062006046783, - "grad_norm": 5.320554733276367, - "learning_rate": 2.2882299899220284e-05, - "loss": 0.6229, - "step": 184050 - }, - { - "epoch": 1.6271504093070952, - "grad_norm": 1.5439127683639526, - "learning_rate": 2.2880826511548415e-05, - "loss": 0.576, - "step": 184060 - }, - { - "epoch": 1.6272388125674073, - "grad_norm": 6.699517250061035, - "learning_rate": 2.2879353123876544e-05, - "loss": 0.5049, - "step": 184070 - }, - { - "epoch": 1.6273272158277199, - "grad_norm": 2.4477782249450684, - "learning_rate": 2.2877879736204672e-05, - "loss": 0.4145, - "step": 184080 - }, - { - "epoch": 1.627415619088032, - "grad_norm": 3.1822073459625244, - "learning_rate": 2.2876406348532804e-05, - "loss": 0.6304, - "step": 184090 - }, - { - "epoch": 1.6275040223483441, - "grad_norm": 9.732050895690918, - "learning_rate": 2.2874932960860932e-05, - "loss": 0.773, - "step": 184100 - }, - { - "epoch": 1.6275924256086565, - "grad_norm": 1.589491844177246, - "learning_rate": 2.287345957318906e-05, - "loss": 0.7218, - "step": 184110 - }, - { - "epoch": 1.6276808288689688, - "grad_norm": 5.9442830085754395, - "learning_rate": 2.287198618551719e-05, - "loss": 0.665, - "step": 184120 - }, - { - "epoch": 1.627769232129281, - "grad_norm": 1.9216110706329346, - "learning_rate": 2.287051279784532e-05, - "loss": 0.6914, - "step": 184130 - }, - { - "epoch": 1.627857635389593, - "grad_norm": 2.534881591796875, - "learning_rate": 2.286903941017345e-05, - "loss": 0.6321, - "step": 184140 - }, - { - "epoch": 1.6279460386499054, - "grad_norm": 9.885744094848633, - "learning_rate": 2.2867566022501577e-05, - "loss": 0.6662, - "step": 184150 - }, - { - "epoch": 1.6280344419102177, - "grad_norm": 1.8704266548156738, - "learning_rate": 2.286609263482971e-05, - "loss": 0.6705, - "step": 184160 - }, - { - "epoch": 1.6281228451705299, - "grad_norm": 0.8257664442062378, - "learning_rate": 2.2864619247157837e-05, - "loss": 0.565, - "step": 184170 - }, - { - "epoch": 1.628211248430842, - "grad_norm": 4.256963729858398, - "learning_rate": 2.2863145859485966e-05, - "loss": 0.5459, - "step": 184180 - }, - { - "epoch": 1.6282996516911543, - "grad_norm": 1.2728620767593384, - "learning_rate": 2.2861672471814094e-05, - "loss": 0.7425, - "step": 184190 - }, - { - "epoch": 1.6283880549514667, - "grad_norm": 12.854273796081543, - "learning_rate": 2.2860199084142226e-05, - "loss": 0.5235, - "step": 184200 - }, - { - "epoch": 1.6284764582117788, - "grad_norm": 1.6517729759216309, - "learning_rate": 2.2858725696470354e-05, - "loss": 0.5652, - "step": 184210 - }, - { - "epoch": 1.6285648614720911, - "grad_norm": 1.1881617307662964, - "learning_rate": 2.2857252308798483e-05, - "loss": 0.5657, - "step": 184220 - }, - { - "epoch": 1.6286532647324035, - "grad_norm": 5.583603858947754, - "learning_rate": 2.285577892112661e-05, - "loss": 0.6541, - "step": 184230 - }, - { - "epoch": 1.6287416679927156, - "grad_norm": 2.6169891357421875, - "learning_rate": 2.2854305533454743e-05, - "loss": 0.6686, - "step": 184240 - }, - { - "epoch": 1.6288300712530277, - "grad_norm": 1.1197354793548584, - "learning_rate": 2.285283214578287e-05, - "loss": 0.7222, - "step": 184250 - }, - { - "epoch": 1.62891847451334, - "grad_norm": 1.1425940990447998, - "learning_rate": 2.2851358758111e-05, - "loss": 0.6223, - "step": 184260 - }, - { - "epoch": 1.6290068777736524, - "grad_norm": 5.089562892913818, - "learning_rate": 2.284988537043913e-05, - "loss": 0.5669, - "step": 184270 - }, - { - "epoch": 1.6290952810339645, - "grad_norm": 3.5676016807556152, - "learning_rate": 2.284841198276726e-05, - "loss": 0.6697, - "step": 184280 - }, - { - "epoch": 1.6291836842942766, - "grad_norm": 2.2371551990509033, - "learning_rate": 2.2846938595095388e-05, - "loss": 0.6177, - "step": 184290 - }, - { - "epoch": 1.629272087554589, - "grad_norm": 1.5458804368972778, - "learning_rate": 2.2845465207423516e-05, - "loss": 0.493, - "step": 184300 - }, - { - "epoch": 1.6293604908149013, - "grad_norm": 31.324037551879883, - "learning_rate": 2.2843991819751648e-05, - "loss": 0.4658, - "step": 184310 - }, - { - "epoch": 1.6294488940752134, - "grad_norm": 2.5619683265686035, - "learning_rate": 2.2842518432079776e-05, - "loss": 0.6893, - "step": 184320 - }, - { - "epoch": 1.6295372973355258, - "grad_norm": 7.988354682922363, - "learning_rate": 2.2841045044407905e-05, - "loss": 0.4634, - "step": 184330 - }, - { - "epoch": 1.6296257005958381, - "grad_norm": 2.203214406967163, - "learning_rate": 2.2839571656736033e-05, - "loss": 0.5366, - "step": 184340 - }, - { - "epoch": 1.6297141038561502, - "grad_norm": 6.620633602142334, - "learning_rate": 2.2838098269064165e-05, - "loss": 0.5371, - "step": 184350 - }, - { - "epoch": 1.6298025071164624, - "grad_norm": 6.454627990722656, - "learning_rate": 2.2836624881392293e-05, - "loss": 0.624, - "step": 184360 - }, - { - "epoch": 1.6298909103767747, - "grad_norm": 1.9121379852294922, - "learning_rate": 2.283515149372042e-05, - "loss": 0.5148, - "step": 184370 - }, - { - "epoch": 1.629979313637087, - "grad_norm": 2.0181052684783936, - "learning_rate": 2.2833678106048553e-05, - "loss": 0.5305, - "step": 184380 - }, - { - "epoch": 1.6300677168973992, - "grad_norm": 4.650597095489502, - "learning_rate": 2.283220471837668e-05, - "loss": 0.669, - "step": 184390 - }, - { - "epoch": 1.6301561201577113, - "grad_norm": 2.9797658920288086, - "learning_rate": 2.283073133070481e-05, - "loss": 0.7286, - "step": 184400 - }, - { - "epoch": 1.6302445234180236, - "grad_norm": 1.9700895547866821, - "learning_rate": 2.2829257943032938e-05, - "loss": 0.577, - "step": 184410 - }, - { - "epoch": 1.630332926678336, - "grad_norm": 2.63667893409729, - "learning_rate": 2.282778455536107e-05, - "loss": 0.5794, - "step": 184420 - }, - { - "epoch": 1.630421329938648, - "grad_norm": 1.9961471557617188, - "learning_rate": 2.28263111676892e-05, - "loss": 0.69, - "step": 184430 - }, - { - "epoch": 1.6305097331989604, - "grad_norm": 3.482682943344116, - "learning_rate": 2.2824837780017327e-05, - "loss": 0.6993, - "step": 184440 - }, - { - "epoch": 1.6305981364592728, - "grad_norm": 2.6156997680664062, - "learning_rate": 2.282336439234546e-05, - "loss": 0.5546, - "step": 184450 - }, - { - "epoch": 1.6306865397195849, - "grad_norm": 2.978020429611206, - "learning_rate": 2.2821891004673587e-05, - "loss": 0.5797, - "step": 184460 - }, - { - "epoch": 1.630774942979897, - "grad_norm": 3.4677491188049316, - "learning_rate": 2.2820417617001715e-05, - "loss": 0.6331, - "step": 184470 - }, - { - "epoch": 1.6308633462402093, - "grad_norm": 1.6658741235733032, - "learning_rate": 2.2818944229329844e-05, - "loss": 0.6836, - "step": 184480 - }, - { - "epoch": 1.6309517495005217, - "grad_norm": 1.7685045003890991, - "learning_rate": 2.2817470841657975e-05, - "loss": 0.5463, - "step": 184490 - }, - { - "epoch": 1.6310401527608338, - "grad_norm": 8.338022232055664, - "learning_rate": 2.2815997453986104e-05, - "loss": 0.6075, - "step": 184500 - }, - { - "epoch": 1.631128556021146, - "grad_norm": 3.6579251289367676, - "learning_rate": 2.2814524066314232e-05, - "loss": 0.6168, - "step": 184510 - }, - { - "epoch": 1.6312169592814583, - "grad_norm": 5.747318744659424, - "learning_rate": 2.281305067864236e-05, - "loss": 0.6771, - "step": 184520 - }, - { - "epoch": 1.6313053625417706, - "grad_norm": 3.40767502784729, - "learning_rate": 2.2811577290970492e-05, - "loss": 0.5803, - "step": 184530 - }, - { - "epoch": 1.6313937658020827, - "grad_norm": 3.503166437149048, - "learning_rate": 2.281010390329862e-05, - "loss": 0.5794, - "step": 184540 - }, - { - "epoch": 1.631482169062395, - "grad_norm": 2.796386480331421, - "learning_rate": 2.280863051562675e-05, - "loss": 0.7916, - "step": 184550 - }, - { - "epoch": 1.6315705723227074, - "grad_norm": 4.32317590713501, - "learning_rate": 2.280715712795488e-05, - "loss": 0.6251, - "step": 184560 - }, - { - "epoch": 1.6316589755830195, - "grad_norm": 1.8309497833251953, - "learning_rate": 2.280568374028301e-05, - "loss": 0.6394, - "step": 184570 - }, - { - "epoch": 1.6317473788433317, - "grad_norm": 4.816558837890625, - "learning_rate": 2.2804210352611137e-05, - "loss": 0.4691, - "step": 184580 - }, - { - "epoch": 1.631835782103644, - "grad_norm": 2.5757365226745605, - "learning_rate": 2.2802736964939266e-05, - "loss": 0.5826, - "step": 184590 - }, - { - "epoch": 1.6319241853639563, - "grad_norm": 3.247929334640503, - "learning_rate": 2.2801263577267397e-05, - "loss": 0.5432, - "step": 184600 - }, - { - "epoch": 1.6320125886242685, - "grad_norm": 1.2992980480194092, - "learning_rate": 2.2799790189595526e-05, - "loss": 0.5994, - "step": 184610 - }, - { - "epoch": 1.6321009918845806, - "grad_norm": 4.61171817779541, - "learning_rate": 2.2798316801923654e-05, - "loss": 0.6519, - "step": 184620 - }, - { - "epoch": 1.632189395144893, - "grad_norm": 3.0313308238983154, - "learning_rate": 2.2796843414251786e-05, - "loss": 0.6346, - "step": 184630 - }, - { - "epoch": 1.6322777984052053, - "grad_norm": 12.606894493103027, - "learning_rate": 2.2795370026579914e-05, - "loss": 0.5395, - "step": 184640 - }, - { - "epoch": 1.6323662016655174, - "grad_norm": 5.260139465332031, - "learning_rate": 2.2793896638908042e-05, - "loss": 0.589, - "step": 184650 - }, - { - "epoch": 1.6324546049258295, - "grad_norm": 1.856351375579834, - "learning_rate": 2.279242325123617e-05, - "loss": 0.5267, - "step": 184660 - }, - { - "epoch": 1.632543008186142, - "grad_norm": 0.9601485133171082, - "learning_rate": 2.2790949863564303e-05, - "loss": 0.4933, - "step": 184670 - }, - { - "epoch": 1.6326314114464542, - "grad_norm": 3.705890655517578, - "learning_rate": 2.278947647589243e-05, - "loss": 0.5195, - "step": 184680 - }, - { - "epoch": 1.6327198147067663, - "grad_norm": 3.7225093841552734, - "learning_rate": 2.278800308822056e-05, - "loss": 0.5904, - "step": 184690 - }, - { - "epoch": 1.6328082179670786, - "grad_norm": 1.198681116104126, - "learning_rate": 2.278652970054869e-05, - "loss": 0.5897, - "step": 184700 - }, - { - "epoch": 1.632896621227391, - "grad_norm": 2.5294363498687744, - "learning_rate": 2.278505631287682e-05, - "loss": 0.5875, - "step": 184710 - }, - { - "epoch": 1.632985024487703, - "grad_norm": 1.3712669610977173, - "learning_rate": 2.2783582925204948e-05, - "loss": 0.558, - "step": 184720 - }, - { - "epoch": 1.6330734277480152, - "grad_norm": 1.1586508750915527, - "learning_rate": 2.278210953753308e-05, - "loss": 0.5094, - "step": 184730 - }, - { - "epoch": 1.6331618310083276, - "grad_norm": 7.613958358764648, - "learning_rate": 2.2780636149861208e-05, - "loss": 0.5836, - "step": 184740 - }, - { - "epoch": 1.63325023426864, - "grad_norm": 2.813015937805176, - "learning_rate": 2.2779162762189336e-05, - "loss": 0.5914, - "step": 184750 - }, - { - "epoch": 1.633338637528952, - "grad_norm": 1.207080602645874, - "learning_rate": 2.2777689374517468e-05, - "loss": 0.567, - "step": 184760 - }, - { - "epoch": 1.6334270407892642, - "grad_norm": 3.395878553390503, - "learning_rate": 2.2776215986845596e-05, - "loss": 0.6267, - "step": 184770 - }, - { - "epoch": 1.6335154440495767, - "grad_norm": 1.9244418144226074, - "learning_rate": 2.2774742599173725e-05, - "loss": 0.6235, - "step": 184780 - }, - { - "epoch": 1.6336038473098888, - "grad_norm": 2.774043560028076, - "learning_rate": 2.2773269211501856e-05, - "loss": 0.6738, - "step": 184790 - }, - { - "epoch": 1.633692250570201, - "grad_norm": 1.2514560222625732, - "learning_rate": 2.2771795823829985e-05, - "loss": 0.4984, - "step": 184800 - }, - { - "epoch": 1.6337806538305133, - "grad_norm": 5.4205241203308105, - "learning_rate": 2.2770322436158113e-05, - "loss": 0.4232, - "step": 184810 - }, - { - "epoch": 1.6338690570908256, - "grad_norm": 2.8734962940216064, - "learning_rate": 2.2768849048486245e-05, - "loss": 0.6807, - "step": 184820 - }, - { - "epoch": 1.6339574603511378, - "grad_norm": 1.3748440742492676, - "learning_rate": 2.2767375660814373e-05, - "loss": 0.5818, - "step": 184830 - }, - { - "epoch": 1.6340458636114499, - "grad_norm": 5.2768731117248535, - "learning_rate": 2.27659022731425e-05, - "loss": 0.6752, - "step": 184840 - }, - { - "epoch": 1.6341342668717622, - "grad_norm": 2.147465944290161, - "learning_rate": 2.2764428885470633e-05, - "loss": 0.6051, - "step": 184850 - }, - { - "epoch": 1.6342226701320746, - "grad_norm": 1.3552194833755493, - "learning_rate": 2.276295549779876e-05, - "loss": 0.6242, - "step": 184860 - }, - { - "epoch": 1.6343110733923867, - "grad_norm": 3.005063056945801, - "learning_rate": 2.276148211012689e-05, - "loss": 0.546, - "step": 184870 - }, - { - "epoch": 1.6343994766526988, - "grad_norm": 2.38718581199646, - "learning_rate": 2.276000872245502e-05, - "loss": 0.6354, - "step": 184880 - }, - { - "epoch": 1.6344878799130111, - "grad_norm": 2.210580348968506, - "learning_rate": 2.275853533478315e-05, - "loss": 0.4162, - "step": 184890 - }, - { - "epoch": 1.6345762831733235, - "grad_norm": 1.0004842281341553, - "learning_rate": 2.275706194711128e-05, - "loss": 0.6462, - "step": 184900 - }, - { - "epoch": 1.6346646864336356, - "grad_norm": 2.734127998352051, - "learning_rate": 2.2755588559439407e-05, - "loss": 0.7612, - "step": 184910 - }, - { - "epoch": 1.634753089693948, - "grad_norm": 2.2676632404327393, - "learning_rate": 2.275411517176754e-05, - "loss": 0.6075, - "step": 184920 - }, - { - "epoch": 1.6348414929542603, - "grad_norm": 1.2737714052200317, - "learning_rate": 2.2752641784095667e-05, - "loss": 0.5965, - "step": 184930 - }, - { - "epoch": 1.6349298962145724, - "grad_norm": 3.9585015773773193, - "learning_rate": 2.2751168396423795e-05, - "loss": 0.6548, - "step": 184940 - }, - { - "epoch": 1.6350182994748845, - "grad_norm": 3.6612370014190674, - "learning_rate": 2.2749695008751924e-05, - "loss": 0.5771, - "step": 184950 - }, - { - "epoch": 1.6351067027351969, - "grad_norm": 2.543405055999756, - "learning_rate": 2.2748221621080055e-05, - "loss": 0.5941, - "step": 184960 - }, - { - "epoch": 1.6351951059955092, - "grad_norm": 5.730416297912598, - "learning_rate": 2.2746748233408184e-05, - "loss": 0.5093, - "step": 184970 - }, - { - "epoch": 1.6352835092558213, - "grad_norm": 3.4625625610351562, - "learning_rate": 2.2745274845736312e-05, - "loss": 0.7759, - "step": 184980 - }, - { - "epoch": 1.6353719125161335, - "grad_norm": 1.772454023361206, - "learning_rate": 2.274380145806444e-05, - "loss": 0.5607, - "step": 184990 - }, - { - "epoch": 1.6354603157764458, - "grad_norm": 1.370222806930542, - "learning_rate": 2.2742328070392572e-05, - "loss": 0.5706, - "step": 185000 - }, - { - "epoch": 1.6355487190367581, - "grad_norm": 3.0200443267822266, - "learning_rate": 2.27408546827207e-05, - "loss": 0.6513, - "step": 185010 - }, - { - "epoch": 1.6356371222970703, - "grad_norm": 7.69234037399292, - "learning_rate": 2.273938129504883e-05, - "loss": 0.5365, - "step": 185020 - }, - { - "epoch": 1.6357255255573826, - "grad_norm": 1.9520457983016968, - "learning_rate": 2.273790790737696e-05, - "loss": 0.6405, - "step": 185030 - }, - { - "epoch": 1.635813928817695, - "grad_norm": 2.9949679374694824, - "learning_rate": 2.273643451970509e-05, - "loss": 0.5321, - "step": 185040 - }, - { - "epoch": 1.635902332078007, - "grad_norm": 2.275728702545166, - "learning_rate": 2.2734961132033217e-05, - "loss": 0.4874, - "step": 185050 - }, - { - "epoch": 1.6359907353383192, - "grad_norm": 6.908626556396484, - "learning_rate": 2.2733487744361346e-05, - "loss": 0.6911, - "step": 185060 - }, - { - "epoch": 1.6360791385986315, - "grad_norm": 1.3334299325942993, - "learning_rate": 2.2732014356689477e-05, - "loss": 0.564, - "step": 185070 - }, - { - "epoch": 1.6361675418589439, - "grad_norm": 5.897999286651611, - "learning_rate": 2.2730540969017606e-05, - "loss": 0.6266, - "step": 185080 - }, - { - "epoch": 1.636255945119256, - "grad_norm": 1.4176234006881714, - "learning_rate": 2.2729067581345734e-05, - "loss": 0.4531, - "step": 185090 - }, - { - "epoch": 1.636344348379568, - "grad_norm": 3.0399603843688965, - "learning_rate": 2.2727594193673866e-05, - "loss": 0.6749, - "step": 185100 - }, - { - "epoch": 1.6364327516398804, - "grad_norm": 1.9612928628921509, - "learning_rate": 2.2726120806001994e-05, - "loss": 0.6035, - "step": 185110 - }, - { - "epoch": 1.6365211549001928, - "grad_norm": 5.286875247955322, - "learning_rate": 2.2724647418330123e-05, - "loss": 0.5198, - "step": 185120 - }, - { - "epoch": 1.636609558160505, - "grad_norm": 1.482245683670044, - "learning_rate": 2.272317403065825e-05, - "loss": 0.6298, - "step": 185130 - }, - { - "epoch": 1.6366979614208172, - "grad_norm": 2.0642032623291016, - "learning_rate": 2.2721700642986383e-05, - "loss": 0.6343, - "step": 185140 - }, - { - "epoch": 1.6367863646811296, - "grad_norm": 1.313356876373291, - "learning_rate": 2.272022725531451e-05, - "loss": 0.642, - "step": 185150 - }, - { - "epoch": 1.6368747679414417, - "grad_norm": 3.3046481609344482, - "learning_rate": 2.271875386764264e-05, - "loss": 0.5257, - "step": 185160 - }, - { - "epoch": 1.6369631712017538, - "grad_norm": 1.4577585458755493, - "learning_rate": 2.2717280479970768e-05, - "loss": 0.6171, - "step": 185170 - }, - { - "epoch": 1.6370515744620662, - "grad_norm": 2.3187859058380127, - "learning_rate": 2.27158070922989e-05, - "loss": 0.6, - "step": 185180 - }, - { - "epoch": 1.6371399777223785, - "grad_norm": 1.291504979133606, - "learning_rate": 2.2714333704627028e-05, - "loss": 0.6561, - "step": 185190 - }, - { - "epoch": 1.6372283809826906, - "grad_norm": 5.608502388000488, - "learning_rate": 2.2712860316955156e-05, - "loss": 0.6199, - "step": 185200 - }, - { - "epoch": 1.6373167842430028, - "grad_norm": 4.817999362945557, - "learning_rate": 2.2711386929283288e-05, - "loss": 0.788, - "step": 185210 - }, - { - "epoch": 1.637405187503315, - "grad_norm": 1.6362338066101074, - "learning_rate": 2.2709913541611416e-05, - "loss": 0.5305, - "step": 185220 - }, - { - "epoch": 1.6374935907636274, - "grad_norm": 3.609710693359375, - "learning_rate": 2.2708440153939545e-05, - "loss": 0.648, - "step": 185230 - }, - { - "epoch": 1.6375819940239396, - "grad_norm": 9.4567289352417, - "learning_rate": 2.2706966766267673e-05, - "loss": 0.6739, - "step": 185240 - }, - { - "epoch": 1.6376703972842517, - "grad_norm": 8.18065071105957, - "learning_rate": 2.2705493378595805e-05, - "loss": 0.6428, - "step": 185250 - }, - { - "epoch": 1.6377588005445642, - "grad_norm": 14.607341766357422, - "learning_rate": 2.2704019990923933e-05, - "loss": 0.5551, - "step": 185260 - }, - { - "epoch": 1.6378472038048764, - "grad_norm": 2.1119654178619385, - "learning_rate": 2.270254660325206e-05, - "loss": 0.6007, - "step": 185270 - }, - { - "epoch": 1.6379356070651885, - "grad_norm": 8.281624794006348, - "learning_rate": 2.2701073215580193e-05, - "loss": 0.6458, - "step": 185280 - }, - { - "epoch": 1.6380240103255008, - "grad_norm": 1.4578211307525635, - "learning_rate": 2.269959982790832e-05, - "loss": 0.6117, - "step": 185290 - }, - { - "epoch": 1.6381124135858132, - "grad_norm": 10.856989860534668, - "learning_rate": 2.269812644023645e-05, - "loss": 0.5313, - "step": 185300 - }, - { - "epoch": 1.6382008168461253, - "grad_norm": 1.363480806350708, - "learning_rate": 2.2696653052564578e-05, - "loss": 0.6278, - "step": 185310 - }, - { - "epoch": 1.6382892201064374, - "grad_norm": 3.4219608306884766, - "learning_rate": 2.269517966489271e-05, - "loss": 0.67, - "step": 185320 - }, - { - "epoch": 1.6383776233667497, - "grad_norm": 3.406155586242676, - "learning_rate": 2.269370627722084e-05, - "loss": 0.6604, - "step": 185330 - }, - { - "epoch": 1.638466026627062, - "grad_norm": 3.8006746768951416, - "learning_rate": 2.2692232889548967e-05, - "loss": 0.5872, - "step": 185340 - }, - { - "epoch": 1.6385544298873742, - "grad_norm": 1.6638083457946777, - "learning_rate": 2.2690759501877095e-05, - "loss": 0.5811, - "step": 185350 - }, - { - "epoch": 1.6386428331476863, - "grad_norm": 4.976661205291748, - "learning_rate": 2.2689286114205227e-05, - "loss": 0.5568, - "step": 185360 - }, - { - "epoch": 1.638731236407999, - "grad_norm": 4.77788782119751, - "learning_rate": 2.2687812726533355e-05, - "loss": 0.5337, - "step": 185370 - }, - { - "epoch": 1.638819639668311, - "grad_norm": 2.5900914669036865, - "learning_rate": 2.2686339338861483e-05, - "loss": 0.5267, - "step": 185380 - }, - { - "epoch": 1.6389080429286231, - "grad_norm": 2.8252573013305664, - "learning_rate": 2.2684865951189615e-05, - "loss": 0.6344, - "step": 185390 - }, - { - "epoch": 1.6389964461889355, - "grad_norm": 1.5785282850265503, - "learning_rate": 2.2683392563517744e-05, - "loss": 0.5995, - "step": 185400 - }, - { - "epoch": 1.6390848494492478, - "grad_norm": 9.918407440185547, - "learning_rate": 2.2681919175845872e-05, - "loss": 0.7484, - "step": 185410 - }, - { - "epoch": 1.63917325270956, - "grad_norm": 3.0052177906036377, - "learning_rate": 2.2680445788174e-05, - "loss": 0.6407, - "step": 185420 - }, - { - "epoch": 1.639261655969872, - "grad_norm": 1.9721765518188477, - "learning_rate": 2.2678972400502132e-05, - "loss": 0.5394, - "step": 185430 - }, - { - "epoch": 1.6393500592301844, - "grad_norm": 2.792327880859375, - "learning_rate": 2.267749901283026e-05, - "loss": 0.7258, - "step": 185440 - }, - { - "epoch": 1.6394384624904967, - "grad_norm": 2.254916191101074, - "learning_rate": 2.267602562515839e-05, - "loss": 0.6591, - "step": 185450 - }, - { - "epoch": 1.6395268657508089, - "grad_norm": 2.5821938514709473, - "learning_rate": 2.2674552237486517e-05, - "loss": 0.5923, - "step": 185460 - }, - { - "epoch": 1.639615269011121, - "grad_norm": 2.5696825981140137, - "learning_rate": 2.267307884981465e-05, - "loss": 0.5828, - "step": 185470 - }, - { - "epoch": 1.6397036722714333, - "grad_norm": 3.0272023677825928, - "learning_rate": 2.2671605462142777e-05, - "loss": 0.6408, - "step": 185480 - }, - { - "epoch": 1.6397920755317457, - "grad_norm": 9.991312026977539, - "learning_rate": 2.2670132074470906e-05, - "loss": 0.582, - "step": 185490 - }, - { - "epoch": 1.6398804787920578, - "grad_norm": 2.480916976928711, - "learning_rate": 2.2668658686799037e-05, - "loss": 0.5271, - "step": 185500 - }, - { - "epoch": 1.6399688820523701, - "grad_norm": 1.6791179180145264, - "learning_rate": 2.2667185299127166e-05, - "loss": 0.6126, - "step": 185510 - }, - { - "epoch": 1.6400572853126825, - "grad_norm": 1.5378644466400146, - "learning_rate": 2.2665711911455294e-05, - "loss": 0.6285, - "step": 185520 - }, - { - "epoch": 1.6401456885729946, - "grad_norm": 1.2760668992996216, - "learning_rate": 2.2664238523783422e-05, - "loss": 0.7315, - "step": 185530 - }, - { - "epoch": 1.6402340918333067, - "grad_norm": 3.544977903366089, - "learning_rate": 2.2662765136111554e-05, - "loss": 0.6292, - "step": 185540 - }, - { - "epoch": 1.640322495093619, - "grad_norm": 7.1313676834106445, - "learning_rate": 2.2661291748439682e-05, - "loss": 0.5463, - "step": 185550 - }, - { - "epoch": 1.6404108983539314, - "grad_norm": 1.6529300212860107, - "learning_rate": 2.265981836076781e-05, - "loss": 0.5482, - "step": 185560 - }, - { - "epoch": 1.6404993016142435, - "grad_norm": 2.0977251529693604, - "learning_rate": 2.2658344973095943e-05, - "loss": 0.5639, - "step": 185570 - }, - { - "epoch": 1.6405877048745556, - "grad_norm": 1.8324156999588013, - "learning_rate": 2.265687158542407e-05, - "loss": 0.5793, - "step": 185580 - }, - { - "epoch": 1.640676108134868, - "grad_norm": 1.737492561340332, - "learning_rate": 2.26553981977522e-05, - "loss": 0.5701, - "step": 185590 - }, - { - "epoch": 1.6407645113951803, - "grad_norm": 3.78633975982666, - "learning_rate": 2.2653924810080328e-05, - "loss": 0.7726, - "step": 185600 - }, - { - "epoch": 1.6408529146554924, - "grad_norm": 2.2083418369293213, - "learning_rate": 2.265245142240846e-05, - "loss": 0.5441, - "step": 185610 - }, - { - "epoch": 1.6409413179158048, - "grad_norm": 9.208758354187012, - "learning_rate": 2.2650978034736588e-05, - "loss": 0.5738, - "step": 185620 - }, - { - "epoch": 1.6410297211761171, - "grad_norm": 2.564452648162842, - "learning_rate": 2.2649504647064716e-05, - "loss": 0.6949, - "step": 185630 - }, - { - "epoch": 1.6411181244364292, - "grad_norm": 2.0815610885620117, - "learning_rate": 2.2648031259392848e-05, - "loss": 0.5191, - "step": 185640 - }, - { - "epoch": 1.6412065276967414, - "grad_norm": 3.158571243286133, - "learning_rate": 2.2646557871720976e-05, - "loss": 0.6093, - "step": 185650 - }, - { - "epoch": 1.6412949309570537, - "grad_norm": 2.1659717559814453, - "learning_rate": 2.2645084484049104e-05, - "loss": 0.5577, - "step": 185660 - }, - { - "epoch": 1.641383334217366, - "grad_norm": 6.634654521942139, - "learning_rate": 2.2643611096377236e-05, - "loss": 0.573, - "step": 185670 - }, - { - "epoch": 1.6414717374776782, - "grad_norm": 6.806859016418457, - "learning_rate": 2.2642137708705365e-05, - "loss": 0.61, - "step": 185680 - }, - { - "epoch": 1.6415601407379903, - "grad_norm": 3.863046646118164, - "learning_rate": 2.2640664321033493e-05, - "loss": 0.7126, - "step": 185690 - }, - { - "epoch": 1.6416485439983026, - "grad_norm": 5.153300762176514, - "learning_rate": 2.2639190933361625e-05, - "loss": 0.5262, - "step": 185700 - }, - { - "epoch": 1.641736947258615, - "grad_norm": 2.3899624347686768, - "learning_rate": 2.2637717545689753e-05, - "loss": 0.5768, - "step": 185710 - }, - { - "epoch": 1.641825350518927, - "grad_norm": 1.7550944089889526, - "learning_rate": 2.263624415801788e-05, - "loss": 0.6629, - "step": 185720 - }, - { - "epoch": 1.6419137537792394, - "grad_norm": 2.075427770614624, - "learning_rate": 2.2634770770346013e-05, - "loss": 0.589, - "step": 185730 - }, - { - "epoch": 1.6420021570395518, - "grad_norm": 9.012155532836914, - "learning_rate": 2.263329738267414e-05, - "loss": 0.5528, - "step": 185740 - }, - { - "epoch": 1.6420905602998639, - "grad_norm": 15.19962215423584, - "learning_rate": 2.263182399500227e-05, - "loss": 0.6877, - "step": 185750 - }, - { - "epoch": 1.642178963560176, - "grad_norm": 5.779613971710205, - "learning_rate": 2.26303506073304e-05, - "loss": 0.5426, - "step": 185760 - }, - { - "epoch": 1.6422673668204884, - "grad_norm": 2.744046688079834, - "learning_rate": 2.262887721965853e-05, - "loss": 0.4839, - "step": 185770 - }, - { - "epoch": 1.6423557700808007, - "grad_norm": 1.8332329988479614, - "learning_rate": 2.2627403831986658e-05, - "loss": 0.725, - "step": 185780 - }, - { - "epoch": 1.6424441733411128, - "grad_norm": 2.207960605621338, - "learning_rate": 2.262593044431479e-05, - "loss": 0.5962, - "step": 185790 - }, - { - "epoch": 1.642532576601425, - "grad_norm": 3.4198386669158936, - "learning_rate": 2.262445705664292e-05, - "loss": 0.6046, - "step": 185800 - }, - { - "epoch": 1.6426209798617373, - "grad_norm": 6.026602268218994, - "learning_rate": 2.2622983668971047e-05, - "loss": 0.5458, - "step": 185810 - }, - { - "epoch": 1.6427093831220496, - "grad_norm": 1.4739265441894531, - "learning_rate": 2.2621510281299175e-05, - "loss": 0.5088, - "step": 185820 - }, - { - "epoch": 1.6427977863823617, - "grad_norm": 4.951425075531006, - "learning_rate": 2.2620036893627307e-05, - "loss": 0.6929, - "step": 185830 - }, - { - "epoch": 1.6428861896426739, - "grad_norm": 7.2399797439575195, - "learning_rate": 2.2618563505955435e-05, - "loss": 0.5343, - "step": 185840 - }, - { - "epoch": 1.6429745929029864, - "grad_norm": 9.530874252319336, - "learning_rate": 2.2617090118283564e-05, - "loss": 0.5051, - "step": 185850 - }, - { - "epoch": 1.6430629961632985, - "grad_norm": 1.4886133670806885, - "learning_rate": 2.2615616730611695e-05, - "loss": 0.4965, - "step": 185860 - }, - { - "epoch": 1.6431513994236107, - "grad_norm": 10.15964126586914, - "learning_rate": 2.2614143342939824e-05, - "loss": 0.5204, - "step": 185870 - }, - { - "epoch": 1.643239802683923, - "grad_norm": 1.6887242794036865, - "learning_rate": 2.2612669955267952e-05, - "loss": 0.5784, - "step": 185880 - }, - { - "epoch": 1.6433282059442353, - "grad_norm": 0.7270352244377136, - "learning_rate": 2.261119656759608e-05, - "loss": 0.6224, - "step": 185890 - }, - { - "epoch": 1.6434166092045475, - "grad_norm": 7.137011528015137, - "learning_rate": 2.2609723179924212e-05, - "loss": 0.5258, - "step": 185900 - }, - { - "epoch": 1.6435050124648596, - "grad_norm": 2.0126235485076904, - "learning_rate": 2.260824979225234e-05, - "loss": 0.5014, - "step": 185910 - }, - { - "epoch": 1.643593415725172, - "grad_norm": 7.399680137634277, - "learning_rate": 2.260677640458047e-05, - "loss": 0.5448, - "step": 185920 - }, - { - "epoch": 1.6436818189854843, - "grad_norm": 3.8143908977508545, - "learning_rate": 2.2605303016908597e-05, - "loss": 0.6536, - "step": 185930 - }, - { - "epoch": 1.6437702222457964, - "grad_norm": 2.3221652507781982, - "learning_rate": 2.260382962923673e-05, - "loss": 0.5971, - "step": 185940 - }, - { - "epoch": 1.6438586255061085, - "grad_norm": 17.32271385192871, - "learning_rate": 2.2602356241564857e-05, - "loss": 0.5239, - "step": 185950 - }, - { - "epoch": 1.643947028766421, - "grad_norm": 3.2116289138793945, - "learning_rate": 2.2600882853892986e-05, - "loss": 0.5328, - "step": 185960 - }, - { - "epoch": 1.6440354320267332, - "grad_norm": 2.5078237056732178, - "learning_rate": 2.2599409466221117e-05, - "loss": 0.5402, - "step": 185970 - }, - { - "epoch": 1.6441238352870453, - "grad_norm": 1.8225317001342773, - "learning_rate": 2.2597936078549246e-05, - "loss": 0.5803, - "step": 185980 - }, - { - "epoch": 1.6442122385473577, - "grad_norm": 2.283177137374878, - "learning_rate": 2.2596462690877374e-05, - "loss": 0.6979, - "step": 185990 - }, - { - "epoch": 1.64430064180767, - "grad_norm": 5.670346260070801, - "learning_rate": 2.2594989303205502e-05, - "loss": 0.4854, - "step": 186000 - }, - { - "epoch": 1.6443890450679821, - "grad_norm": 2.1823768615722656, - "learning_rate": 2.2593515915533634e-05, - "loss": 0.5479, - "step": 186010 - }, - { - "epoch": 1.6444774483282942, - "grad_norm": 1.916414499282837, - "learning_rate": 2.2592042527861762e-05, - "loss": 0.5529, - "step": 186020 - }, - { - "epoch": 1.6445658515886066, - "grad_norm": 3.1143224239349365, - "learning_rate": 2.259056914018989e-05, - "loss": 0.6401, - "step": 186030 - }, - { - "epoch": 1.644654254848919, - "grad_norm": 2.6293599605560303, - "learning_rate": 2.2589095752518023e-05, - "loss": 0.6842, - "step": 186040 - }, - { - "epoch": 1.644742658109231, - "grad_norm": 3.2117276191711426, - "learning_rate": 2.258762236484615e-05, - "loss": 0.7098, - "step": 186050 - }, - { - "epoch": 1.6448310613695432, - "grad_norm": 1.3258217573165894, - "learning_rate": 2.258614897717428e-05, - "loss": 0.5693, - "step": 186060 - }, - { - "epoch": 1.6449194646298555, - "grad_norm": 2.0359179973602295, - "learning_rate": 2.2584675589502408e-05, - "loss": 0.6484, - "step": 186070 - }, - { - "epoch": 1.6450078678901678, - "grad_norm": 2.763855457305908, - "learning_rate": 2.258320220183054e-05, - "loss": 0.7361, - "step": 186080 - }, - { - "epoch": 1.64509627115048, - "grad_norm": 1.6098722219467163, - "learning_rate": 2.2581728814158668e-05, - "loss": 0.5642, - "step": 186090 - }, - { - "epoch": 1.6451846744107923, - "grad_norm": 1.175952434539795, - "learning_rate": 2.2580255426486796e-05, - "loss": 0.7005, - "step": 186100 - }, - { - "epoch": 1.6452730776711046, - "grad_norm": 4.795873165130615, - "learning_rate": 2.2578782038814924e-05, - "loss": 0.6346, - "step": 186110 - }, - { - "epoch": 1.6453614809314168, - "grad_norm": 1.2376720905303955, - "learning_rate": 2.2577308651143056e-05, - "loss": 0.5795, - "step": 186120 - }, - { - "epoch": 1.6454498841917289, - "grad_norm": 1.6838208436965942, - "learning_rate": 2.2575835263471185e-05, - "loss": 0.5601, - "step": 186130 - }, - { - "epoch": 1.6455382874520412, - "grad_norm": 3.498685359954834, - "learning_rate": 2.2574361875799313e-05, - "loss": 0.6309, - "step": 186140 - }, - { - "epoch": 1.6456266907123536, - "grad_norm": 2.9972593784332275, - "learning_rate": 2.2572888488127445e-05, - "loss": 0.6418, - "step": 186150 - }, - { - "epoch": 1.6457150939726657, - "grad_norm": 10.504207611083984, - "learning_rate": 2.2571415100455573e-05, - "loss": 0.5684, - "step": 186160 - }, - { - "epoch": 1.6458034972329778, - "grad_norm": 1.811249852180481, - "learning_rate": 2.25699417127837e-05, - "loss": 0.5437, - "step": 186170 - }, - { - "epoch": 1.6458919004932901, - "grad_norm": 4.171734809875488, - "learning_rate": 2.256846832511183e-05, - "loss": 0.6472, - "step": 186180 - }, - { - "epoch": 1.6459803037536025, - "grad_norm": 1.820402979850769, - "learning_rate": 2.256699493743996e-05, - "loss": 0.6145, - "step": 186190 - }, - { - "epoch": 1.6460687070139146, - "grad_norm": 1.4646812677383423, - "learning_rate": 2.256552154976809e-05, - "loss": 0.6537, - "step": 186200 - }, - { - "epoch": 1.646157110274227, - "grad_norm": 1.6161844730377197, - "learning_rate": 2.2564048162096218e-05, - "loss": 0.5979, - "step": 186210 - }, - { - "epoch": 1.6462455135345393, - "grad_norm": 2.2486977577209473, - "learning_rate": 2.256257477442435e-05, - "loss": 0.6974, - "step": 186220 - }, - { - "epoch": 1.6463339167948514, - "grad_norm": 2.3361196517944336, - "learning_rate": 2.2561101386752478e-05, - "loss": 0.5686, - "step": 186230 - }, - { - "epoch": 1.6464223200551635, - "grad_norm": 2.4169716835021973, - "learning_rate": 2.2559627999080607e-05, - "loss": 0.6224, - "step": 186240 - }, - { - "epoch": 1.6465107233154759, - "grad_norm": 5.070600509643555, - "learning_rate": 2.2558154611408735e-05, - "loss": 0.5573, - "step": 186250 - }, - { - "epoch": 1.6465991265757882, - "grad_norm": 13.2886381149292, - "learning_rate": 2.2556681223736867e-05, - "loss": 0.7492, - "step": 186260 - }, - { - "epoch": 1.6466875298361003, - "grad_norm": 9.86184310913086, - "learning_rate": 2.2555207836064995e-05, - "loss": 0.5797, - "step": 186270 - }, - { - "epoch": 1.6467759330964125, - "grad_norm": 1.712245225906372, - "learning_rate": 2.2553734448393123e-05, - "loss": 0.5541, - "step": 186280 - }, - { - "epoch": 1.6468643363567248, - "grad_norm": 1.4967656135559082, - "learning_rate": 2.2552261060721252e-05, - "loss": 0.5551, - "step": 186290 - }, - { - "epoch": 1.6469527396170371, - "grad_norm": 17.65376853942871, - "learning_rate": 2.2550787673049384e-05, - "loss": 0.6918, - "step": 186300 - }, - { - "epoch": 1.6470411428773493, - "grad_norm": 14.704512596130371, - "learning_rate": 2.2549314285377512e-05, - "loss": 0.546, - "step": 186310 - }, - { - "epoch": 1.6471295461376616, - "grad_norm": 4.653510570526123, - "learning_rate": 2.254784089770564e-05, - "loss": 0.659, - "step": 186320 - }, - { - "epoch": 1.647217949397974, - "grad_norm": 2.2873945236206055, - "learning_rate": 2.2546367510033772e-05, - "loss": 0.5374, - "step": 186330 - }, - { - "epoch": 1.647306352658286, - "grad_norm": 15.1098051071167, - "learning_rate": 2.25448941223619e-05, - "loss": 0.59, - "step": 186340 - }, - { - "epoch": 1.6473947559185982, - "grad_norm": 1.3340559005737305, - "learning_rate": 2.254342073469003e-05, - "loss": 0.5251, - "step": 186350 - }, - { - "epoch": 1.6474831591789105, - "grad_norm": 3.3727757930755615, - "learning_rate": 2.2541947347018157e-05, - "loss": 0.7079, - "step": 186360 - }, - { - "epoch": 1.6475715624392229, - "grad_norm": 1.6442217826843262, - "learning_rate": 2.254047395934629e-05, - "loss": 0.531, - "step": 186370 - }, - { - "epoch": 1.647659965699535, - "grad_norm": 5.728442668914795, - "learning_rate": 2.2539000571674417e-05, - "loss": 0.5283, - "step": 186380 - }, - { - "epoch": 1.647748368959847, - "grad_norm": 8.326568603515625, - "learning_rate": 2.2537527184002545e-05, - "loss": 0.6114, - "step": 186390 - }, - { - "epoch": 1.6478367722201595, - "grad_norm": 1.2240368127822876, - "learning_rate": 2.2536053796330674e-05, - "loss": 0.626, - "step": 186400 - }, - { - "epoch": 1.6479251754804718, - "grad_norm": 9.812299728393555, - "learning_rate": 2.2534580408658806e-05, - "loss": 0.6095, - "step": 186410 - }, - { - "epoch": 1.648013578740784, - "grad_norm": 4.076673984527588, - "learning_rate": 2.2533107020986934e-05, - "loss": 0.6195, - "step": 186420 - }, - { - "epoch": 1.6481019820010963, - "grad_norm": 1.2228164672851562, - "learning_rate": 2.2531633633315062e-05, - "loss": 0.4627, - "step": 186430 - }, - { - "epoch": 1.6481903852614086, - "grad_norm": 5.404401779174805, - "learning_rate": 2.2530160245643194e-05, - "loss": 0.4749, - "step": 186440 - }, - { - "epoch": 1.6482787885217207, - "grad_norm": 2.2291276454925537, - "learning_rate": 2.2528686857971322e-05, - "loss": 0.6116, - "step": 186450 - }, - { - "epoch": 1.6483671917820328, - "grad_norm": 1.830512523651123, - "learning_rate": 2.252721347029945e-05, - "loss": 0.57, - "step": 186460 - }, - { - "epoch": 1.6484555950423452, - "grad_norm": 2.566335439682007, - "learning_rate": 2.252574008262758e-05, - "loss": 0.4799, - "step": 186470 - }, - { - "epoch": 1.6485439983026575, - "grad_norm": 17.35238265991211, - "learning_rate": 2.252426669495571e-05, - "loss": 0.5629, - "step": 186480 - }, - { - "epoch": 1.6486324015629696, - "grad_norm": 4.275138854980469, - "learning_rate": 2.252279330728384e-05, - "loss": 0.6852, - "step": 186490 - }, - { - "epoch": 1.6487208048232818, - "grad_norm": 2.7943878173828125, - "learning_rate": 2.2521319919611968e-05, - "loss": 0.6412, - "step": 186500 - }, - { - "epoch": 1.648809208083594, - "grad_norm": 7.2049384117126465, - "learning_rate": 2.25198465319401e-05, - "loss": 0.6042, - "step": 186510 - }, - { - "epoch": 1.6488976113439064, - "grad_norm": 2.6606061458587646, - "learning_rate": 2.2518373144268228e-05, - "loss": 0.6231, - "step": 186520 - }, - { - "epoch": 1.6489860146042186, - "grad_norm": 2.138690948486328, - "learning_rate": 2.2516899756596356e-05, - "loss": 0.676, - "step": 186530 - }, - { - "epoch": 1.6490744178645307, - "grad_norm": 2.766820192337036, - "learning_rate": 2.2515426368924484e-05, - "loss": 0.5783, - "step": 186540 - }, - { - "epoch": 1.6491628211248432, - "grad_norm": 8.024649620056152, - "learning_rate": 2.2513952981252616e-05, - "loss": 0.8184, - "step": 186550 - }, - { - "epoch": 1.6492512243851554, - "grad_norm": 5.261190891265869, - "learning_rate": 2.2512479593580744e-05, - "loss": 0.6714, - "step": 186560 - }, - { - "epoch": 1.6493396276454675, - "grad_norm": 1.8569610118865967, - "learning_rate": 2.2511006205908873e-05, - "loss": 0.5392, - "step": 186570 - }, - { - "epoch": 1.6494280309057798, - "grad_norm": 2.904841184616089, - "learning_rate": 2.2509532818237005e-05, - "loss": 0.7231, - "step": 186580 - }, - { - "epoch": 1.6495164341660922, - "grad_norm": 3.9287497997283936, - "learning_rate": 2.2508059430565133e-05, - "loss": 0.5742, - "step": 186590 - }, - { - "epoch": 1.6496048374264043, - "grad_norm": 2.037377119064331, - "learning_rate": 2.250658604289326e-05, - "loss": 0.6421, - "step": 186600 - }, - { - "epoch": 1.6496932406867164, - "grad_norm": 1.6801235675811768, - "learning_rate": 2.2505112655221393e-05, - "loss": 0.5124, - "step": 186610 - }, - { - "epoch": 1.6497816439470288, - "grad_norm": 6.721368312835693, - "learning_rate": 2.250363926754952e-05, - "loss": 0.6706, - "step": 186620 - }, - { - "epoch": 1.649870047207341, - "grad_norm": 9.116744041442871, - "learning_rate": 2.250216587987765e-05, - "loss": 0.7733, - "step": 186630 - }, - { - "epoch": 1.6499584504676532, - "grad_norm": 0.9526000022888184, - "learning_rate": 2.250069249220578e-05, - "loss": 0.5411, - "step": 186640 - }, - { - "epoch": 1.6500468537279653, - "grad_norm": 4.075675010681152, - "learning_rate": 2.249921910453391e-05, - "loss": 0.5596, - "step": 186650 - }, - { - "epoch": 1.6501352569882777, - "grad_norm": 2.9119768142700195, - "learning_rate": 2.2497745716862038e-05, - "loss": 0.5673, - "step": 186660 - }, - { - "epoch": 1.65022366024859, - "grad_norm": 4.40766716003418, - "learning_rate": 2.249627232919017e-05, - "loss": 0.6125, - "step": 186670 - }, - { - "epoch": 1.6503120635089021, - "grad_norm": 8.399806022644043, - "learning_rate": 2.2494798941518298e-05, - "loss": 0.6156, - "step": 186680 - }, - { - "epoch": 1.6504004667692145, - "grad_norm": 1.9836020469665527, - "learning_rate": 2.2493325553846427e-05, - "loss": 0.6544, - "step": 186690 - }, - { - "epoch": 1.6504888700295268, - "grad_norm": 3.8788788318634033, - "learning_rate": 2.249185216617456e-05, - "loss": 0.5849, - "step": 186700 - }, - { - "epoch": 1.650577273289839, - "grad_norm": 2.894810199737549, - "learning_rate": 2.2490378778502687e-05, - "loss": 0.5491, - "step": 186710 - }, - { - "epoch": 1.650665676550151, - "grad_norm": 4.892343997955322, - "learning_rate": 2.2488905390830815e-05, - "loss": 0.6121, - "step": 186720 - }, - { - "epoch": 1.6507540798104634, - "grad_norm": 1.6945147514343262, - "learning_rate": 2.2487432003158947e-05, - "loss": 0.5917, - "step": 186730 - }, - { - "epoch": 1.6508424830707757, - "grad_norm": 4.941824436187744, - "learning_rate": 2.2485958615487075e-05, - "loss": 0.6246, - "step": 186740 - }, - { - "epoch": 1.6509308863310879, - "grad_norm": 3.3184974193573, - "learning_rate": 2.2484485227815203e-05, - "loss": 0.5743, - "step": 186750 - }, - { - "epoch": 1.6510192895914, - "grad_norm": 5.543766975402832, - "learning_rate": 2.2483011840143332e-05, - "loss": 0.6602, - "step": 186760 - }, - { - "epoch": 1.6511076928517123, - "grad_norm": 2.293617010116577, - "learning_rate": 2.2481538452471464e-05, - "loss": 0.5078, - "step": 186770 - }, - { - "epoch": 1.6511960961120247, - "grad_norm": 1.9293001890182495, - "learning_rate": 2.2480065064799592e-05, - "loss": 0.6742, - "step": 186780 - }, - { - "epoch": 1.6512844993723368, - "grad_norm": 4.787430286407471, - "learning_rate": 2.247859167712772e-05, - "loss": 0.4449, - "step": 186790 - }, - { - "epoch": 1.6513729026326491, - "grad_norm": 4.850201606750488, - "learning_rate": 2.2477118289455852e-05, - "loss": 0.6375, - "step": 186800 - }, - { - "epoch": 1.6514613058929615, - "grad_norm": 0.9525098204612732, - "learning_rate": 2.247564490178398e-05, - "loss": 0.6298, - "step": 186810 - }, - { - "epoch": 1.6515497091532736, - "grad_norm": 11.680496215820312, - "learning_rate": 2.247417151411211e-05, - "loss": 0.59, - "step": 186820 - }, - { - "epoch": 1.6516381124135857, - "grad_norm": 3.2707183361053467, - "learning_rate": 2.2472698126440237e-05, - "loss": 0.6066, - "step": 186830 - }, - { - "epoch": 1.651726515673898, - "grad_norm": 1.8392772674560547, - "learning_rate": 2.247122473876837e-05, - "loss": 0.7862, - "step": 186840 - }, - { - "epoch": 1.6518149189342104, - "grad_norm": 1.7167593240737915, - "learning_rate": 2.2469751351096497e-05, - "loss": 0.6628, - "step": 186850 - }, - { - "epoch": 1.6519033221945225, - "grad_norm": 3.199334144592285, - "learning_rate": 2.2468277963424626e-05, - "loss": 0.6035, - "step": 186860 - }, - { - "epoch": 1.6519917254548346, - "grad_norm": 1.1371521949768066, - "learning_rate": 2.2466804575752757e-05, - "loss": 0.5503, - "step": 186870 - }, - { - "epoch": 1.652080128715147, - "grad_norm": 4.007225513458252, - "learning_rate": 2.2465331188080886e-05, - "loss": 0.4445, - "step": 186880 - }, - { - "epoch": 1.6521685319754593, - "grad_norm": 2.58516263961792, - "learning_rate": 2.2463857800409014e-05, - "loss": 0.5687, - "step": 186890 - }, - { - "epoch": 1.6522569352357714, - "grad_norm": 3.320324420928955, - "learning_rate": 2.2462384412737142e-05, - "loss": 0.5398, - "step": 186900 - }, - { - "epoch": 1.6523453384960838, - "grad_norm": 1.634325385093689, - "learning_rate": 2.2460911025065274e-05, - "loss": 0.6049, - "step": 186910 - }, - { - "epoch": 1.6524337417563961, - "grad_norm": 4.976755142211914, - "learning_rate": 2.2459437637393402e-05, - "loss": 0.4744, - "step": 186920 - }, - { - "epoch": 1.6525221450167082, - "grad_norm": 12.777931213378906, - "learning_rate": 2.245796424972153e-05, - "loss": 0.5794, - "step": 186930 - }, - { - "epoch": 1.6526105482770204, - "grad_norm": 2.296560049057007, - "learning_rate": 2.245649086204966e-05, - "loss": 0.6496, - "step": 186940 - }, - { - "epoch": 1.6526989515373327, - "grad_norm": 3.4259567260742188, - "learning_rate": 2.245501747437779e-05, - "loss": 0.5835, - "step": 186950 - }, - { - "epoch": 1.652787354797645, - "grad_norm": 2.2072949409484863, - "learning_rate": 2.245354408670592e-05, - "loss": 0.5796, - "step": 186960 - }, - { - "epoch": 1.6528757580579572, - "grad_norm": 2.7551636695861816, - "learning_rate": 2.2452070699034048e-05, - "loss": 0.6548, - "step": 186970 - }, - { - "epoch": 1.6529641613182693, - "grad_norm": 0.8537187576293945, - "learning_rate": 2.245059731136218e-05, - "loss": 0.6141, - "step": 186980 - }, - { - "epoch": 1.6530525645785816, - "grad_norm": 4.9957756996154785, - "learning_rate": 2.2449123923690308e-05, - "loss": 0.5832, - "step": 186990 - }, - { - "epoch": 1.653140967838894, - "grad_norm": 3.157557964324951, - "learning_rate": 2.2447650536018436e-05, - "loss": 0.6262, - "step": 187000 - }, - { - "epoch": 1.653229371099206, - "grad_norm": 4.5432868003845215, - "learning_rate": 2.2446177148346564e-05, - "loss": 0.6355, - "step": 187010 - }, - { - "epoch": 1.6533177743595184, - "grad_norm": 9.247173309326172, - "learning_rate": 2.2444703760674696e-05, - "loss": 0.6208, - "step": 187020 - }, - { - "epoch": 1.6534061776198308, - "grad_norm": 0.7592854499816895, - "learning_rate": 2.2443230373002825e-05, - "loss": 0.6037, - "step": 187030 - }, - { - "epoch": 1.653494580880143, - "grad_norm": 2.9964187145233154, - "learning_rate": 2.2441756985330953e-05, - "loss": 0.5343, - "step": 187040 - }, - { - "epoch": 1.653582984140455, - "grad_norm": 3.456821918487549, - "learning_rate": 2.244028359765908e-05, - "loss": 0.6327, - "step": 187050 - }, - { - "epoch": 1.6536713874007674, - "grad_norm": 1.7094639539718628, - "learning_rate": 2.2438810209987213e-05, - "loss": 0.6772, - "step": 187060 - }, - { - "epoch": 1.6537597906610797, - "grad_norm": 0.9522131085395813, - "learning_rate": 2.243733682231534e-05, - "loss": 0.6629, - "step": 187070 - }, - { - "epoch": 1.6538481939213918, - "grad_norm": 3.7966177463531494, - "learning_rate": 2.243586343464347e-05, - "loss": 0.5795, - "step": 187080 - }, - { - "epoch": 1.653936597181704, - "grad_norm": 1.757983922958374, - "learning_rate": 2.24343900469716e-05, - "loss": 0.5867, - "step": 187090 - }, - { - "epoch": 1.6540250004420163, - "grad_norm": 1.2853597402572632, - "learning_rate": 2.243291665929973e-05, - "loss": 0.5369, - "step": 187100 - }, - { - "epoch": 1.6541134037023286, - "grad_norm": 3.481792688369751, - "learning_rate": 2.2431443271627858e-05, - "loss": 0.5699, - "step": 187110 - }, - { - "epoch": 1.6542018069626407, - "grad_norm": 3.536203384399414, - "learning_rate": 2.2429969883955986e-05, - "loss": 0.5452, - "step": 187120 - }, - { - "epoch": 1.6542902102229529, - "grad_norm": 2.491907835006714, - "learning_rate": 2.2428496496284118e-05, - "loss": 0.6593, - "step": 187130 - }, - { - "epoch": 1.6543786134832654, - "grad_norm": 2.9078681468963623, - "learning_rate": 2.2427023108612247e-05, - "loss": 0.6332, - "step": 187140 - }, - { - "epoch": 1.6544670167435775, - "grad_norm": 2.54404616355896, - "learning_rate": 2.2425549720940375e-05, - "loss": 0.643, - "step": 187150 - }, - { - "epoch": 1.6545554200038897, - "grad_norm": 4.271890163421631, - "learning_rate": 2.2424076333268507e-05, - "loss": 0.5725, - "step": 187160 - }, - { - "epoch": 1.654643823264202, - "grad_norm": 1.9113109111785889, - "learning_rate": 2.2422602945596635e-05, - "loss": 0.5966, - "step": 187170 - }, - { - "epoch": 1.6547322265245143, - "grad_norm": 7.789828300476074, - "learning_rate": 2.2421129557924763e-05, - "loss": 0.5529, - "step": 187180 - }, - { - "epoch": 1.6548206297848265, - "grad_norm": 13.340986251831055, - "learning_rate": 2.2419656170252892e-05, - "loss": 0.4414, - "step": 187190 - }, - { - "epoch": 1.6549090330451386, - "grad_norm": 1.3944019079208374, - "learning_rate": 2.2418182782581023e-05, - "loss": 0.6176, - "step": 187200 - }, - { - "epoch": 1.654997436305451, - "grad_norm": 1.931065559387207, - "learning_rate": 2.2416709394909152e-05, - "loss": 0.5971, - "step": 187210 - }, - { - "epoch": 1.6550858395657633, - "grad_norm": 4.512258052825928, - "learning_rate": 2.241523600723728e-05, - "loss": 0.55, - "step": 187220 - }, - { - "epoch": 1.6551742428260754, - "grad_norm": 3.3120241165161133, - "learning_rate": 2.241376261956541e-05, - "loss": 0.7053, - "step": 187230 - }, - { - "epoch": 1.6552626460863875, - "grad_norm": 2.242070436477661, - "learning_rate": 2.241228923189354e-05, - "loss": 0.636, - "step": 187240 - }, - { - "epoch": 1.6553510493466999, - "grad_norm": 1.2260303497314453, - "learning_rate": 2.241081584422167e-05, - "loss": 0.6742, - "step": 187250 - }, - { - "epoch": 1.6554394526070122, - "grad_norm": 3.577976942062378, - "learning_rate": 2.2409342456549797e-05, - "loss": 0.525, - "step": 187260 - }, - { - "epoch": 1.6555278558673243, - "grad_norm": 2.638819456100464, - "learning_rate": 2.240786906887793e-05, - "loss": 0.5726, - "step": 187270 - }, - { - "epoch": 1.6556162591276367, - "grad_norm": 1.5112980604171753, - "learning_rate": 2.2406395681206057e-05, - "loss": 0.5624, - "step": 187280 - }, - { - "epoch": 1.655704662387949, - "grad_norm": 3.5007877349853516, - "learning_rate": 2.2404922293534185e-05, - "loss": 0.5716, - "step": 187290 - }, - { - "epoch": 1.6557930656482611, - "grad_norm": 3.1316921710968018, - "learning_rate": 2.2403448905862314e-05, - "loss": 0.6398, - "step": 187300 - }, - { - "epoch": 1.6558814689085732, - "grad_norm": 6.054881572723389, - "learning_rate": 2.2401975518190446e-05, - "loss": 0.6693, - "step": 187310 - }, - { - "epoch": 1.6559698721688856, - "grad_norm": 10.04617977142334, - "learning_rate": 2.2400502130518574e-05, - "loss": 0.6034, - "step": 187320 - }, - { - "epoch": 1.656058275429198, - "grad_norm": 1.2988650798797607, - "learning_rate": 2.2399028742846702e-05, - "loss": 0.6341, - "step": 187330 - }, - { - "epoch": 1.65614667868951, - "grad_norm": 5.454225063323975, - "learning_rate": 2.2397555355174834e-05, - "loss": 0.5, - "step": 187340 - }, - { - "epoch": 1.6562350819498222, - "grad_norm": 3.382472515106201, - "learning_rate": 2.2396081967502962e-05, - "loss": 0.6223, - "step": 187350 - }, - { - "epoch": 1.6563234852101345, - "grad_norm": 12.317181587219238, - "learning_rate": 2.239460857983109e-05, - "loss": 0.5549, - "step": 187360 - }, - { - "epoch": 1.6564118884704468, - "grad_norm": 8.568225860595703, - "learning_rate": 2.239313519215922e-05, - "loss": 0.6905, - "step": 187370 - }, - { - "epoch": 1.656500291730759, - "grad_norm": 4.240734577178955, - "learning_rate": 2.239166180448735e-05, - "loss": 0.5609, - "step": 187380 - }, - { - "epoch": 1.6565886949910713, - "grad_norm": 4.741962432861328, - "learning_rate": 2.239018841681548e-05, - "loss": 0.6033, - "step": 187390 - }, - { - "epoch": 1.6566770982513837, - "grad_norm": 2.4022674560546875, - "learning_rate": 2.2388715029143607e-05, - "loss": 0.4941, - "step": 187400 - }, - { - "epoch": 1.6567655015116958, - "grad_norm": 1.1704354286193848, - "learning_rate": 2.2387241641471736e-05, - "loss": 0.6629, - "step": 187410 - }, - { - "epoch": 1.656853904772008, - "grad_norm": 3.423189163208008, - "learning_rate": 2.2385768253799868e-05, - "loss": 0.5746, - "step": 187420 - }, - { - "epoch": 1.6569423080323202, - "grad_norm": 5.525378227233887, - "learning_rate": 2.2384294866127996e-05, - "loss": 0.6669, - "step": 187430 - }, - { - "epoch": 1.6570307112926326, - "grad_norm": 4.234412670135498, - "learning_rate": 2.2382821478456124e-05, - "loss": 0.5415, - "step": 187440 - }, - { - "epoch": 1.6571191145529447, - "grad_norm": 4.066867351531982, - "learning_rate": 2.2381348090784256e-05, - "loss": 0.5628, - "step": 187450 - }, - { - "epoch": 1.6572075178132568, - "grad_norm": 1.7800112962722778, - "learning_rate": 2.2379874703112384e-05, - "loss": 0.5709, - "step": 187460 - }, - { - "epoch": 1.6572959210735692, - "grad_norm": 2.0689873695373535, - "learning_rate": 2.2378401315440513e-05, - "loss": 0.4691, - "step": 187470 - }, - { - "epoch": 1.6573843243338815, - "grad_norm": 1.8084672689437866, - "learning_rate": 2.237692792776864e-05, - "loss": 0.5711, - "step": 187480 - }, - { - "epoch": 1.6574727275941936, - "grad_norm": 1.3834961652755737, - "learning_rate": 2.2375454540096773e-05, - "loss": 0.4465, - "step": 187490 - }, - { - "epoch": 1.657561130854506, - "grad_norm": 1.350616693496704, - "learning_rate": 2.23739811524249e-05, - "loss": 0.562, - "step": 187500 - }, - { - "epoch": 1.6576495341148183, - "grad_norm": 16.401464462280273, - "learning_rate": 2.237250776475303e-05, - "loss": 0.6055, - "step": 187510 - }, - { - "epoch": 1.6577379373751304, - "grad_norm": 1.357333779335022, - "learning_rate": 2.237103437708116e-05, - "loss": 0.4337, - "step": 187520 - }, - { - "epoch": 1.6578263406354425, - "grad_norm": 5.806070327758789, - "learning_rate": 2.236956098940929e-05, - "loss": 0.4938, - "step": 187530 - }, - { - "epoch": 1.6579147438957549, - "grad_norm": 3.723224639892578, - "learning_rate": 2.236808760173742e-05, - "loss": 0.6482, - "step": 187540 - }, - { - "epoch": 1.6580031471560672, - "grad_norm": 2.085385799407959, - "learning_rate": 2.236661421406555e-05, - "loss": 0.6182, - "step": 187550 - }, - { - "epoch": 1.6580915504163793, - "grad_norm": 2.7253615856170654, - "learning_rate": 2.2365140826393678e-05, - "loss": 0.7215, - "step": 187560 - }, - { - "epoch": 1.6581799536766915, - "grad_norm": 5.712151050567627, - "learning_rate": 2.236366743872181e-05, - "loss": 0.5406, - "step": 187570 - }, - { - "epoch": 1.6582683569370038, - "grad_norm": 3.2005090713500977, - "learning_rate": 2.2362194051049938e-05, - "loss": 0.5711, - "step": 187580 - }, - { - "epoch": 1.6583567601973161, - "grad_norm": 1.4085785150527954, - "learning_rate": 2.2360720663378067e-05, - "loss": 0.6013, - "step": 187590 - }, - { - "epoch": 1.6584451634576283, - "grad_norm": 0.8115139007568359, - "learning_rate": 2.2359247275706198e-05, - "loss": 0.495, - "step": 187600 - }, - { - "epoch": 1.6585335667179406, - "grad_norm": 2.6341500282287598, - "learning_rate": 2.2357773888034327e-05, - "loss": 0.6219, - "step": 187610 - }, - { - "epoch": 1.658621969978253, - "grad_norm": 2.4017670154571533, - "learning_rate": 2.2356300500362455e-05, - "loss": 0.6649, - "step": 187620 - }, - { - "epoch": 1.658710373238565, - "grad_norm": 11.742932319641113, - "learning_rate": 2.2354827112690587e-05, - "loss": 0.5668, - "step": 187630 - }, - { - "epoch": 1.6587987764988772, - "grad_norm": 1.3605257272720337, - "learning_rate": 2.2353353725018715e-05, - "loss": 0.5387, - "step": 187640 - }, - { - "epoch": 1.6588871797591895, - "grad_norm": 1.6390085220336914, - "learning_rate": 2.2351880337346843e-05, - "loss": 0.494, - "step": 187650 - }, - { - "epoch": 1.6589755830195019, - "grad_norm": 2.0409624576568604, - "learning_rate": 2.2350406949674972e-05, - "loss": 0.5474, - "step": 187660 - }, - { - "epoch": 1.659063986279814, - "grad_norm": 1.4725699424743652, - "learning_rate": 2.2348933562003104e-05, - "loss": 0.5128, - "step": 187670 - }, - { - "epoch": 1.6591523895401261, - "grad_norm": 1.872623324394226, - "learning_rate": 2.2347460174331232e-05, - "loss": 0.585, - "step": 187680 - }, - { - "epoch": 1.6592407928004385, - "grad_norm": 1.105606198310852, - "learning_rate": 2.234598678665936e-05, - "loss": 0.6576, - "step": 187690 - }, - { - "epoch": 1.6593291960607508, - "grad_norm": 3.464085102081299, - "learning_rate": 2.234451339898749e-05, - "loss": 0.7388, - "step": 187700 - }, - { - "epoch": 1.659417599321063, - "grad_norm": 1.8075155019760132, - "learning_rate": 2.234304001131562e-05, - "loss": 0.4952, - "step": 187710 - }, - { - "epoch": 1.659506002581375, - "grad_norm": 2.524611711502075, - "learning_rate": 2.234156662364375e-05, - "loss": 0.4873, - "step": 187720 - }, - { - "epoch": 1.6595944058416876, - "grad_norm": 1.3582056760787964, - "learning_rate": 2.2340093235971877e-05, - "loss": 0.5825, - "step": 187730 - }, - { - "epoch": 1.6596828091019997, - "grad_norm": 2.472538471221924, - "learning_rate": 2.233861984830001e-05, - "loss": 0.7072, - "step": 187740 - }, - { - "epoch": 1.6597712123623118, - "grad_norm": 4.318940162658691, - "learning_rate": 2.2337146460628137e-05, - "loss": 0.4695, - "step": 187750 - }, - { - "epoch": 1.6598596156226242, - "grad_norm": 1.5569344758987427, - "learning_rate": 2.2335673072956265e-05, - "loss": 0.5672, - "step": 187760 - }, - { - "epoch": 1.6599480188829365, - "grad_norm": 1.4827817678451538, - "learning_rate": 2.2334199685284394e-05, - "loss": 0.4942, - "step": 187770 - }, - { - "epoch": 1.6600364221432486, - "grad_norm": 2.3764090538024902, - "learning_rate": 2.2332726297612526e-05, - "loss": 0.595, - "step": 187780 - }, - { - "epoch": 1.6601248254035608, - "grad_norm": 1.2101463079452515, - "learning_rate": 2.2331252909940654e-05, - "loss": 0.5768, - "step": 187790 - }, - { - "epoch": 1.660213228663873, - "grad_norm": 0.9474406242370605, - "learning_rate": 2.2329779522268782e-05, - "loss": 0.7007, - "step": 187800 - }, - { - "epoch": 1.6603016319241855, - "grad_norm": 2.228926181793213, - "learning_rate": 2.2328306134596914e-05, - "loss": 0.5875, - "step": 187810 - }, - { - "epoch": 1.6603900351844976, - "grad_norm": 2.193197250366211, - "learning_rate": 2.2326832746925042e-05, - "loss": 0.5612, - "step": 187820 - }, - { - "epoch": 1.6604784384448097, - "grad_norm": 1.5102007389068604, - "learning_rate": 2.232535935925317e-05, - "loss": 0.6149, - "step": 187830 - }, - { - "epoch": 1.660566841705122, - "grad_norm": 2.672593832015991, - "learning_rate": 2.23238859715813e-05, - "loss": 0.6846, - "step": 187840 - }, - { - "epoch": 1.6606552449654344, - "grad_norm": 5.263713359832764, - "learning_rate": 2.232241258390943e-05, - "loss": 0.7165, - "step": 187850 - }, - { - "epoch": 1.6607436482257465, - "grad_norm": 2.5864052772521973, - "learning_rate": 2.232093919623756e-05, - "loss": 0.5533, - "step": 187860 - }, - { - "epoch": 1.6608320514860588, - "grad_norm": 1.2693744897842407, - "learning_rate": 2.2319465808565688e-05, - "loss": 0.6, - "step": 187870 - }, - { - "epoch": 1.6609204547463712, - "grad_norm": 2.708627462387085, - "learning_rate": 2.2317992420893816e-05, - "loss": 0.5311, - "step": 187880 - }, - { - "epoch": 1.6610088580066833, - "grad_norm": 3.467881679534912, - "learning_rate": 2.2316519033221948e-05, - "loss": 0.66, - "step": 187890 - }, - { - "epoch": 1.6610972612669954, - "grad_norm": 14.021622657775879, - "learning_rate": 2.2315045645550076e-05, - "loss": 0.6753, - "step": 187900 - }, - { - "epoch": 1.6611856645273078, - "grad_norm": 2.3567147254943848, - "learning_rate": 2.2313572257878204e-05, - "loss": 0.4414, - "step": 187910 - }, - { - "epoch": 1.66127406778762, - "grad_norm": 1.0080100297927856, - "learning_rate": 2.2312098870206336e-05, - "loss": 0.4932, - "step": 187920 - }, - { - "epoch": 1.6613624710479322, - "grad_norm": 3.2714929580688477, - "learning_rate": 2.2310625482534464e-05, - "loss": 0.6729, - "step": 187930 - }, - { - "epoch": 1.6614508743082443, - "grad_norm": 1.458680272102356, - "learning_rate": 2.2309152094862593e-05, - "loss": 0.7108, - "step": 187940 - }, - { - "epoch": 1.6615392775685567, - "grad_norm": 1.31399667263031, - "learning_rate": 2.230767870719072e-05, - "loss": 0.6631, - "step": 187950 - }, - { - "epoch": 1.661627680828869, - "grad_norm": 2.3060646057128906, - "learning_rate": 2.2306205319518853e-05, - "loss": 0.6086, - "step": 187960 - }, - { - "epoch": 1.6617160840891811, - "grad_norm": 4.0019001960754395, - "learning_rate": 2.230473193184698e-05, - "loss": 0.5381, - "step": 187970 - }, - { - "epoch": 1.6618044873494935, - "grad_norm": 6.709582805633545, - "learning_rate": 2.230325854417511e-05, - "loss": 0.6134, - "step": 187980 - }, - { - "epoch": 1.6618928906098058, - "grad_norm": 1.386630654335022, - "learning_rate": 2.230178515650324e-05, - "loss": 0.5532, - "step": 187990 - }, - { - "epoch": 1.661981293870118, - "grad_norm": 4.375740051269531, - "learning_rate": 2.230031176883137e-05, - "loss": 0.6025, - "step": 188000 - }, - { - "epoch": 1.66206969713043, - "grad_norm": 3.1843533515930176, - "learning_rate": 2.2298838381159498e-05, - "loss": 0.5813, - "step": 188010 - }, - { - "epoch": 1.6621581003907424, - "grad_norm": 1.7038811445236206, - "learning_rate": 2.2297364993487626e-05, - "loss": 0.6368, - "step": 188020 - }, - { - "epoch": 1.6622465036510548, - "grad_norm": 5.219362735748291, - "learning_rate": 2.2295891605815758e-05, - "loss": 0.6255, - "step": 188030 - }, - { - "epoch": 1.6623349069113669, - "grad_norm": 2.0721471309661865, - "learning_rate": 2.2294418218143887e-05, - "loss": 0.5182, - "step": 188040 - }, - { - "epoch": 1.662423310171679, - "grad_norm": 1.7778161764144897, - "learning_rate": 2.2292944830472015e-05, - "loss": 0.6499, - "step": 188050 - }, - { - "epoch": 1.6625117134319913, - "grad_norm": 1.397182822227478, - "learning_rate": 2.2291471442800143e-05, - "loss": 0.5333, - "step": 188060 - }, - { - "epoch": 1.6626001166923037, - "grad_norm": 3.3567376136779785, - "learning_rate": 2.2289998055128275e-05, - "loss": 0.6544, - "step": 188070 - }, - { - "epoch": 1.6626885199526158, - "grad_norm": 3.1433956623077393, - "learning_rate": 2.2288524667456403e-05, - "loss": 0.6111, - "step": 188080 - }, - { - "epoch": 1.6627769232129281, - "grad_norm": 13.049280166625977, - "learning_rate": 2.228705127978453e-05, - "loss": 0.6801, - "step": 188090 - }, - { - "epoch": 1.6628653264732405, - "grad_norm": 1.7103008031845093, - "learning_rate": 2.2285577892112663e-05, - "loss": 0.5051, - "step": 188100 - }, - { - "epoch": 1.6629537297335526, - "grad_norm": 3.4923737049102783, - "learning_rate": 2.2284104504440792e-05, - "loss": 0.6392, - "step": 188110 - }, - { - "epoch": 1.6630421329938647, - "grad_norm": 1.9245039224624634, - "learning_rate": 2.228263111676892e-05, - "loss": 0.5564, - "step": 188120 - }, - { - "epoch": 1.663130536254177, - "grad_norm": 1.9640815258026123, - "learning_rate": 2.228115772909705e-05, - "loss": 0.6027, - "step": 188130 - }, - { - "epoch": 1.6632189395144894, - "grad_norm": 8.780740737915039, - "learning_rate": 2.227968434142518e-05, - "loss": 0.6337, - "step": 188140 - }, - { - "epoch": 1.6633073427748015, - "grad_norm": 2.6531083583831787, - "learning_rate": 2.227821095375331e-05, - "loss": 0.4749, - "step": 188150 - }, - { - "epoch": 1.6633957460351136, - "grad_norm": 1.0486245155334473, - "learning_rate": 2.2276737566081437e-05, - "loss": 0.4814, - "step": 188160 - }, - { - "epoch": 1.663484149295426, - "grad_norm": 5.010915756225586, - "learning_rate": 2.2275264178409565e-05, - "loss": 0.6252, - "step": 188170 - }, - { - "epoch": 1.6635725525557383, - "grad_norm": 12.290897369384766, - "learning_rate": 2.2273790790737697e-05, - "loss": 0.6341, - "step": 188180 - }, - { - "epoch": 1.6636609558160504, - "grad_norm": 8.985589981079102, - "learning_rate": 2.2272317403065825e-05, - "loss": 0.6347, - "step": 188190 - }, - { - "epoch": 1.6637493590763628, - "grad_norm": 11.642650604248047, - "learning_rate": 2.2270844015393954e-05, - "loss": 0.4847, - "step": 188200 - }, - { - "epoch": 1.6638377623366751, - "grad_norm": 5.4836320877075195, - "learning_rate": 2.2269370627722085e-05, - "loss": 0.628, - "step": 188210 - }, - { - "epoch": 1.6639261655969872, - "grad_norm": 9.160797119140625, - "learning_rate": 2.2267897240050214e-05, - "loss": 0.5779, - "step": 188220 - }, - { - "epoch": 1.6640145688572994, - "grad_norm": 4.764699935913086, - "learning_rate": 2.2266423852378342e-05, - "loss": 0.5482, - "step": 188230 - }, - { - "epoch": 1.6641029721176117, - "grad_norm": 1.3461828231811523, - "learning_rate": 2.226495046470647e-05, - "loss": 0.6048, - "step": 188240 - }, - { - "epoch": 1.664191375377924, - "grad_norm": 4.535086154937744, - "learning_rate": 2.2263477077034602e-05, - "loss": 0.5348, - "step": 188250 - }, - { - "epoch": 1.6642797786382362, - "grad_norm": 3.120342254638672, - "learning_rate": 2.226200368936273e-05, - "loss": 0.52, - "step": 188260 - }, - { - "epoch": 1.6643681818985483, - "grad_norm": 3.6663310527801514, - "learning_rate": 2.226053030169086e-05, - "loss": 0.5827, - "step": 188270 - }, - { - "epoch": 1.6644565851588606, - "grad_norm": 8.960272789001465, - "learning_rate": 2.225905691401899e-05, - "loss": 0.5495, - "step": 188280 - }, - { - "epoch": 1.664544988419173, - "grad_norm": 1.650512456893921, - "learning_rate": 2.225758352634712e-05, - "loss": 0.5691, - "step": 188290 - }, - { - "epoch": 1.664633391679485, - "grad_norm": 6.905547142028809, - "learning_rate": 2.2256110138675247e-05, - "loss": 0.6997, - "step": 188300 - }, - { - "epoch": 1.6647217949397972, - "grad_norm": 11.614811897277832, - "learning_rate": 2.2254636751003376e-05, - "loss": 0.6876, - "step": 188310 - }, - { - "epoch": 1.6648101982001098, - "grad_norm": 4.543526649475098, - "learning_rate": 2.2253163363331508e-05, - "loss": 0.5165, - "step": 188320 - }, - { - "epoch": 1.664898601460422, - "grad_norm": 6.2200727462768555, - "learning_rate": 2.2251689975659636e-05, - "loss": 0.5396, - "step": 188330 - }, - { - "epoch": 1.664987004720734, - "grad_norm": 5.304297924041748, - "learning_rate": 2.2250216587987764e-05, - "loss": 0.4579, - "step": 188340 - }, - { - "epoch": 1.6650754079810464, - "grad_norm": 4.436426162719727, - "learning_rate": 2.2248743200315893e-05, - "loss": 0.6397, - "step": 188350 - }, - { - "epoch": 1.6651638112413587, - "grad_norm": 11.11939525604248, - "learning_rate": 2.2247269812644024e-05, - "loss": 0.6046, - "step": 188360 - }, - { - "epoch": 1.6652522145016708, - "grad_norm": 1.316219687461853, - "learning_rate": 2.2245796424972153e-05, - "loss": 0.4938, - "step": 188370 - }, - { - "epoch": 1.665340617761983, - "grad_norm": 1.542055606842041, - "learning_rate": 2.224432303730028e-05, - "loss": 0.5233, - "step": 188380 - }, - { - "epoch": 1.6654290210222953, - "grad_norm": 8.293159484863281, - "learning_rate": 2.2242849649628413e-05, - "loss": 0.4731, - "step": 188390 - }, - { - "epoch": 1.6655174242826076, - "grad_norm": 15.16063404083252, - "learning_rate": 2.224137626195654e-05, - "loss": 0.6341, - "step": 188400 - }, - { - "epoch": 1.6656058275429197, - "grad_norm": 1.1575562953948975, - "learning_rate": 2.223990287428467e-05, - "loss": 0.4586, - "step": 188410 - }, - { - "epoch": 1.6656942308032319, - "grad_norm": 1.761497974395752, - "learning_rate": 2.22384294866128e-05, - "loss": 0.5505, - "step": 188420 - }, - { - "epoch": 1.6657826340635442, - "grad_norm": 4.031102180480957, - "learning_rate": 2.223695609894093e-05, - "loss": 0.5952, - "step": 188430 - }, - { - "epoch": 1.6658710373238566, - "grad_norm": 1.2118767499923706, - "learning_rate": 2.2235482711269058e-05, - "loss": 0.5478, - "step": 188440 - }, - { - "epoch": 1.6659594405841687, - "grad_norm": 1.559574007987976, - "learning_rate": 2.223400932359719e-05, - "loss": 0.5723, - "step": 188450 - }, - { - "epoch": 1.666047843844481, - "grad_norm": 3.166830062866211, - "learning_rate": 2.2232535935925318e-05, - "loss": 0.66, - "step": 188460 - }, - { - "epoch": 1.6661362471047934, - "grad_norm": 2.521289110183716, - "learning_rate": 2.2231062548253446e-05, - "loss": 0.5163, - "step": 188470 - }, - { - "epoch": 1.6662246503651055, - "grad_norm": 3.942953109741211, - "learning_rate": 2.2229589160581578e-05, - "loss": 0.4564, - "step": 188480 - }, - { - "epoch": 1.6663130536254176, - "grad_norm": 2.9778642654418945, - "learning_rate": 2.2228115772909706e-05, - "loss": 0.6761, - "step": 188490 - }, - { - "epoch": 1.66640145688573, - "grad_norm": 2.649951696395874, - "learning_rate": 2.2226642385237835e-05, - "loss": 0.5729, - "step": 188500 - }, - { - "epoch": 1.6664898601460423, - "grad_norm": 1.2398520708084106, - "learning_rate": 2.2225168997565967e-05, - "loss": 0.5507, - "step": 188510 - }, - { - "epoch": 1.6665782634063544, - "grad_norm": 1.4003452062606812, - "learning_rate": 2.2223695609894095e-05, - "loss": 0.5047, - "step": 188520 - }, - { - "epoch": 1.6666666666666665, - "grad_norm": 6.24932861328125, - "learning_rate": 2.2222222222222223e-05, - "loss": 0.4166, - "step": 188530 - }, - { - "epoch": 1.6667550699269789, - "grad_norm": 2.1581618785858154, - "learning_rate": 2.2220748834550355e-05, - "loss": 0.7106, - "step": 188540 - }, - { - "epoch": 1.6668434731872912, - "grad_norm": 6.85422420501709, - "learning_rate": 2.2219275446878483e-05, - "loss": 0.6032, - "step": 188550 - }, - { - "epoch": 1.6669318764476033, - "grad_norm": 1.0539196729660034, - "learning_rate": 2.2217802059206612e-05, - "loss": 0.5044, - "step": 188560 - }, - { - "epoch": 1.6670202797079157, - "grad_norm": 1.972545862197876, - "learning_rate": 2.2216328671534743e-05, - "loss": 0.5386, - "step": 188570 - }, - { - "epoch": 1.667108682968228, - "grad_norm": 6.655059814453125, - "learning_rate": 2.2214855283862872e-05, - "loss": 0.5911, - "step": 188580 - }, - { - "epoch": 1.6671970862285401, - "grad_norm": 5.74491024017334, - "learning_rate": 2.2213381896191e-05, - "loss": 0.5598, - "step": 188590 - }, - { - "epoch": 1.6672854894888522, - "grad_norm": 1.4643230438232422, - "learning_rate": 2.221190850851913e-05, - "loss": 0.6169, - "step": 188600 - }, - { - "epoch": 1.6673738927491646, - "grad_norm": 2.5021920204162598, - "learning_rate": 2.221043512084726e-05, - "loss": 0.5406, - "step": 188610 - }, - { - "epoch": 1.667462296009477, - "grad_norm": 14.962636947631836, - "learning_rate": 2.220896173317539e-05, - "loss": 0.4999, - "step": 188620 - }, - { - "epoch": 1.667550699269789, - "grad_norm": 1.1757675409317017, - "learning_rate": 2.2207488345503517e-05, - "loss": 0.5597, - "step": 188630 - }, - { - "epoch": 1.6676391025301012, - "grad_norm": 3.5266504287719727, - "learning_rate": 2.2206014957831645e-05, - "loss": 0.5871, - "step": 188640 - }, - { - "epoch": 1.6677275057904135, - "grad_norm": 3.238380193710327, - "learning_rate": 2.2204541570159777e-05, - "loss": 0.5241, - "step": 188650 - }, - { - "epoch": 1.6678159090507259, - "grad_norm": 3.122004747390747, - "learning_rate": 2.2203068182487905e-05, - "loss": 0.6736, - "step": 188660 - }, - { - "epoch": 1.667904312311038, - "grad_norm": 1.0609664916992188, - "learning_rate": 2.2201594794816034e-05, - "loss": 0.6957, - "step": 188670 - }, - { - "epoch": 1.6679927155713503, - "grad_norm": 2.5533833503723145, - "learning_rate": 2.2200121407144166e-05, - "loss": 0.6652, - "step": 188680 - }, - { - "epoch": 1.6680811188316627, - "grad_norm": 3.396364450454712, - "learning_rate": 2.2198648019472294e-05, - "loss": 0.5739, - "step": 188690 - }, - { - "epoch": 1.6681695220919748, - "grad_norm": 14.063793182373047, - "learning_rate": 2.2197174631800422e-05, - "loss": 0.6358, - "step": 188700 - }, - { - "epoch": 1.668257925352287, - "grad_norm": 2.8764028549194336, - "learning_rate": 2.219570124412855e-05, - "loss": 0.5964, - "step": 188710 - }, - { - "epoch": 1.6683463286125992, - "grad_norm": 0.9605642557144165, - "learning_rate": 2.2194227856456682e-05, - "loss": 0.5841, - "step": 188720 - }, - { - "epoch": 1.6684347318729116, - "grad_norm": 2.392451047897339, - "learning_rate": 2.219275446878481e-05, - "loss": 0.5822, - "step": 188730 - }, - { - "epoch": 1.6685231351332237, - "grad_norm": 2.0240206718444824, - "learning_rate": 2.219128108111294e-05, - "loss": 0.5108, - "step": 188740 - }, - { - "epoch": 1.6686115383935358, - "grad_norm": 1.1215158700942993, - "learning_rate": 2.218980769344107e-05, - "loss": 0.4555, - "step": 188750 - }, - { - "epoch": 1.6686999416538482, - "grad_norm": 7.258119583129883, - "learning_rate": 2.21883343057692e-05, - "loss": 0.5292, - "step": 188760 - }, - { - "epoch": 1.6687883449141605, - "grad_norm": 2.3384623527526855, - "learning_rate": 2.2186860918097327e-05, - "loss": 0.6956, - "step": 188770 - }, - { - "epoch": 1.6688767481744726, - "grad_norm": 4.769532203674316, - "learning_rate": 2.2185387530425456e-05, - "loss": 0.6884, - "step": 188780 - }, - { - "epoch": 1.668965151434785, - "grad_norm": 1.6644866466522217, - "learning_rate": 2.2183914142753588e-05, - "loss": 0.7263, - "step": 188790 - }, - { - "epoch": 1.6690535546950973, - "grad_norm": 2.7710771560668945, - "learning_rate": 2.2182440755081716e-05, - "loss": 0.5692, - "step": 188800 - }, - { - "epoch": 1.6691419579554094, - "grad_norm": 1.0649518966674805, - "learning_rate": 2.2180967367409844e-05, - "loss": 0.6625, - "step": 188810 - }, - { - "epoch": 1.6692303612157215, - "grad_norm": 4.310955047607422, - "learning_rate": 2.2179493979737973e-05, - "loss": 0.5366, - "step": 188820 - }, - { - "epoch": 1.669318764476034, - "grad_norm": 6.253264904022217, - "learning_rate": 2.2178020592066104e-05, - "loss": 0.5959, - "step": 188830 - }, - { - "epoch": 1.6694071677363462, - "grad_norm": 0.9093913435935974, - "learning_rate": 2.2176547204394233e-05, - "loss": 0.5524, - "step": 188840 - }, - { - "epoch": 1.6694955709966584, - "grad_norm": 1.5057843923568726, - "learning_rate": 2.217507381672236e-05, - "loss": 0.585, - "step": 188850 - }, - { - "epoch": 1.6695839742569705, - "grad_norm": 4.095244407653809, - "learning_rate": 2.2173600429050493e-05, - "loss": 0.7318, - "step": 188860 - }, - { - "epoch": 1.6696723775172828, - "grad_norm": 3.4648289680480957, - "learning_rate": 2.217212704137862e-05, - "loss": 0.6113, - "step": 188870 - }, - { - "epoch": 1.6697607807775952, - "grad_norm": 5.554011821746826, - "learning_rate": 2.217065365370675e-05, - "loss": 0.5773, - "step": 188880 - }, - { - "epoch": 1.6698491840379073, - "grad_norm": 15.912015914916992, - "learning_rate": 2.2169180266034878e-05, - "loss": 0.6196, - "step": 188890 - }, - { - "epoch": 1.6699375872982194, - "grad_norm": 4.737215518951416, - "learning_rate": 2.216770687836301e-05, - "loss": 0.607, - "step": 188900 - }, - { - "epoch": 1.670025990558532, - "grad_norm": 2.334709644317627, - "learning_rate": 2.2166233490691138e-05, - "loss": 0.6417, - "step": 188910 - }, - { - "epoch": 1.670114393818844, - "grad_norm": 2.6589438915252686, - "learning_rate": 2.2164760103019266e-05, - "loss": 0.5442, - "step": 188920 - }, - { - "epoch": 1.6702027970791562, - "grad_norm": 0.9686906933784485, - "learning_rate": 2.2163286715347398e-05, - "loss": 0.548, - "step": 188930 - }, - { - "epoch": 1.6702912003394685, - "grad_norm": 2.9497954845428467, - "learning_rate": 2.2161813327675526e-05, - "loss": 0.5604, - "step": 188940 - }, - { - "epoch": 1.6703796035997809, - "grad_norm": 1.4715207815170288, - "learning_rate": 2.2160339940003655e-05, - "loss": 0.5868, - "step": 188950 - }, - { - "epoch": 1.670468006860093, - "grad_norm": 6.004668235778809, - "learning_rate": 2.2158866552331783e-05, - "loss": 0.6727, - "step": 188960 - }, - { - "epoch": 1.6705564101204051, - "grad_norm": 5.1338348388671875, - "learning_rate": 2.2157393164659915e-05, - "loss": 0.6981, - "step": 188970 - }, - { - "epoch": 1.6706448133807175, - "grad_norm": 2.2565853595733643, - "learning_rate": 2.2155919776988043e-05, - "loss": 0.5748, - "step": 188980 - }, - { - "epoch": 1.6707332166410298, - "grad_norm": 3.186152935028076, - "learning_rate": 2.215444638931617e-05, - "loss": 0.6113, - "step": 188990 - }, - { - "epoch": 1.670821619901342, - "grad_norm": 4.91539192199707, - "learning_rate": 2.21529730016443e-05, - "loss": 0.5266, - "step": 189000 - }, - { - "epoch": 1.670910023161654, - "grad_norm": 1.577486276626587, - "learning_rate": 2.2151499613972432e-05, - "loss": 0.4357, - "step": 189010 - }, - { - "epoch": 1.6709984264219664, - "grad_norm": 4.022982120513916, - "learning_rate": 2.215002622630056e-05, - "loss": 0.6327, - "step": 189020 - }, - { - "epoch": 1.6710868296822787, - "grad_norm": 1.7423595190048218, - "learning_rate": 2.214855283862869e-05, - "loss": 0.6096, - "step": 189030 - }, - { - "epoch": 1.6711752329425908, - "grad_norm": 11.674312591552734, - "learning_rate": 2.214707945095682e-05, - "loss": 0.573, - "step": 189040 - }, - { - "epoch": 1.6712636362029032, - "grad_norm": 1.4974020719528198, - "learning_rate": 2.214560606328495e-05, - "loss": 0.4743, - "step": 189050 - }, - { - "epoch": 1.6713520394632155, - "grad_norm": 2.0666520595550537, - "learning_rate": 2.2144132675613077e-05, - "loss": 0.7592, - "step": 189060 - }, - { - "epoch": 1.6714404427235277, - "grad_norm": 2.0698180198669434, - "learning_rate": 2.2142659287941205e-05, - "loss": 0.5692, - "step": 189070 - }, - { - "epoch": 1.6715288459838398, - "grad_norm": 4.766151428222656, - "learning_rate": 2.2141185900269337e-05, - "loss": 0.6432, - "step": 189080 - }, - { - "epoch": 1.6716172492441521, - "grad_norm": 2.26936411857605, - "learning_rate": 2.2139712512597465e-05, - "loss": 0.6432, - "step": 189090 - }, - { - "epoch": 1.6717056525044645, - "grad_norm": 8.839527130126953, - "learning_rate": 2.2138239124925594e-05, - "loss": 0.5871, - "step": 189100 - }, - { - "epoch": 1.6717940557647766, - "grad_norm": 7.770999908447266, - "learning_rate": 2.2136765737253722e-05, - "loss": 0.644, - "step": 189110 - }, - { - "epoch": 1.6718824590250887, - "grad_norm": 1.8214045763015747, - "learning_rate": 2.2135292349581854e-05, - "loss": 0.6545, - "step": 189120 - }, - { - "epoch": 1.671970862285401, - "grad_norm": 11.122117042541504, - "learning_rate": 2.2133818961909982e-05, - "loss": 0.4978, - "step": 189130 - }, - { - "epoch": 1.6720592655457134, - "grad_norm": 1.1969584226608276, - "learning_rate": 2.213234557423811e-05, - "loss": 0.7185, - "step": 189140 - }, - { - "epoch": 1.6721476688060255, - "grad_norm": 11.206798553466797, - "learning_rate": 2.2130872186566242e-05, - "loss": 0.5742, - "step": 189150 - }, - { - "epoch": 1.6722360720663378, - "grad_norm": 7.0461859703063965, - "learning_rate": 2.212939879889437e-05, - "loss": 0.6616, - "step": 189160 - }, - { - "epoch": 1.6723244753266502, - "grad_norm": 2.256686210632324, - "learning_rate": 2.21279254112225e-05, - "loss": 0.7487, - "step": 189170 - }, - { - "epoch": 1.6724128785869623, - "grad_norm": 3.1147313117980957, - "learning_rate": 2.2126452023550627e-05, - "loss": 0.5055, - "step": 189180 - }, - { - "epoch": 1.6725012818472744, - "grad_norm": 1.971746563911438, - "learning_rate": 2.212497863587876e-05, - "loss": 0.6966, - "step": 189190 - }, - { - "epoch": 1.6725896851075868, - "grad_norm": 3.3352856636047363, - "learning_rate": 2.2123505248206887e-05, - "loss": 0.6319, - "step": 189200 - }, - { - "epoch": 1.672678088367899, - "grad_norm": 1.1682038307189941, - "learning_rate": 2.2122031860535016e-05, - "loss": 0.5754, - "step": 189210 - }, - { - "epoch": 1.6727664916282112, - "grad_norm": 2.785252094268799, - "learning_rate": 2.2120558472863147e-05, - "loss": 0.5977, - "step": 189220 - }, - { - "epoch": 1.6728548948885233, - "grad_norm": 3.5565831661224365, - "learning_rate": 2.2119085085191276e-05, - "loss": 0.5839, - "step": 189230 - }, - { - "epoch": 1.6729432981488357, - "grad_norm": 3.4115993976593018, - "learning_rate": 2.2117611697519404e-05, - "loss": 0.6556, - "step": 189240 - }, - { - "epoch": 1.673031701409148, - "grad_norm": 4.680783271789551, - "learning_rate": 2.2116138309847533e-05, - "loss": 0.6052, - "step": 189250 - }, - { - "epoch": 1.6731201046694602, - "grad_norm": 1.8483526706695557, - "learning_rate": 2.2114664922175664e-05, - "loss": 0.6151, - "step": 189260 - }, - { - "epoch": 1.6732085079297725, - "grad_norm": 2.25266695022583, - "learning_rate": 2.2113191534503793e-05, - "loss": 0.6653, - "step": 189270 - }, - { - "epoch": 1.6732969111900848, - "grad_norm": 1.655000925064087, - "learning_rate": 2.211171814683192e-05, - "loss": 0.6066, - "step": 189280 - }, - { - "epoch": 1.673385314450397, - "grad_norm": 2.4040040969848633, - "learning_rate": 2.211024475916005e-05, - "loss": 0.5819, - "step": 189290 - }, - { - "epoch": 1.673473717710709, - "grad_norm": 2.0747737884521484, - "learning_rate": 2.210877137148818e-05, - "loss": 0.5092, - "step": 189300 - }, - { - "epoch": 1.6735621209710214, - "grad_norm": 4.702084064483643, - "learning_rate": 2.210729798381631e-05, - "loss": 0.5864, - "step": 189310 - }, - { - "epoch": 1.6736505242313338, - "grad_norm": 4.6156768798828125, - "learning_rate": 2.2105824596144438e-05, - "loss": 0.6299, - "step": 189320 - }, - { - "epoch": 1.6737389274916459, - "grad_norm": 5.976464748382568, - "learning_rate": 2.210435120847257e-05, - "loss": 0.7717, - "step": 189330 - }, - { - "epoch": 1.673827330751958, - "grad_norm": 1.6078767776489258, - "learning_rate": 2.2102877820800698e-05, - "loss": 0.547, - "step": 189340 - }, - { - "epoch": 1.6739157340122703, - "grad_norm": 3.873584747314453, - "learning_rate": 2.2101404433128826e-05, - "loss": 0.5983, - "step": 189350 - }, - { - "epoch": 1.6740041372725827, - "grad_norm": 1.2261841297149658, - "learning_rate": 2.2099931045456958e-05, - "loss": 0.6695, - "step": 189360 - }, - { - "epoch": 1.6740925405328948, - "grad_norm": 8.731363296508789, - "learning_rate": 2.2098457657785086e-05, - "loss": 0.6594, - "step": 189370 - }, - { - "epoch": 1.6741809437932071, - "grad_norm": 5.022148132324219, - "learning_rate": 2.2096984270113215e-05, - "loss": 0.6323, - "step": 189380 - }, - { - "epoch": 1.6742693470535195, - "grad_norm": 2.15146803855896, - "learning_rate": 2.2095510882441346e-05, - "loss": 0.5235, - "step": 189390 - }, - { - "epoch": 1.6743577503138316, - "grad_norm": 3.7636878490448, - "learning_rate": 2.2094037494769475e-05, - "loss": 0.601, - "step": 189400 - }, - { - "epoch": 1.6744461535741437, - "grad_norm": 2.561110496520996, - "learning_rate": 2.2092564107097603e-05, - "loss": 0.5239, - "step": 189410 - }, - { - "epoch": 1.674534556834456, - "grad_norm": 1.4131098985671997, - "learning_rate": 2.2091090719425735e-05, - "loss": 0.5494, - "step": 189420 - }, - { - "epoch": 1.6746229600947684, - "grad_norm": 0.7710692286491394, - "learning_rate": 2.2089617331753863e-05, - "loss": 0.5949, - "step": 189430 - }, - { - "epoch": 1.6747113633550805, - "grad_norm": 5.19439697265625, - "learning_rate": 2.208814394408199e-05, - "loss": 0.6324, - "step": 189440 - }, - { - "epoch": 1.6747997666153926, - "grad_norm": 1.2359737157821655, - "learning_rate": 2.2086670556410123e-05, - "loss": 0.5129, - "step": 189450 - }, - { - "epoch": 1.674888169875705, - "grad_norm": 2.959333658218384, - "learning_rate": 2.208519716873825e-05, - "loss": 0.5621, - "step": 189460 - }, - { - "epoch": 1.6749765731360173, - "grad_norm": 1.3144824504852295, - "learning_rate": 2.208372378106638e-05, - "loss": 0.7098, - "step": 189470 - }, - { - "epoch": 1.6750649763963295, - "grad_norm": 1.24237859249115, - "learning_rate": 2.2082250393394512e-05, - "loss": 0.5468, - "step": 189480 - }, - { - "epoch": 1.6751533796566416, - "grad_norm": 10.24328327178955, - "learning_rate": 2.208077700572264e-05, - "loss": 0.568, - "step": 189490 - }, - { - "epoch": 1.6752417829169541, - "grad_norm": 16.48509407043457, - "learning_rate": 2.207930361805077e-05, - "loss": 0.6656, - "step": 189500 - }, - { - "epoch": 1.6753301861772663, - "grad_norm": 8.129464149475098, - "learning_rate": 2.20778302303789e-05, - "loss": 0.5774, - "step": 189510 - }, - { - "epoch": 1.6754185894375784, - "grad_norm": 1.916213035583496, - "learning_rate": 2.207635684270703e-05, - "loss": 0.5001, - "step": 189520 - }, - { - "epoch": 1.6755069926978907, - "grad_norm": 1.5008715391159058, - "learning_rate": 2.2074883455035157e-05, - "loss": 0.6012, - "step": 189530 - }, - { - "epoch": 1.675595395958203, - "grad_norm": 3.7476067543029785, - "learning_rate": 2.2073410067363285e-05, - "loss": 0.6386, - "step": 189540 - }, - { - "epoch": 1.6756837992185152, - "grad_norm": 14.789105415344238, - "learning_rate": 2.2071936679691417e-05, - "loss": 0.5663, - "step": 189550 - }, - { - "epoch": 1.6757722024788273, - "grad_norm": 5.446702003479004, - "learning_rate": 2.2070463292019545e-05, - "loss": 0.6136, - "step": 189560 - }, - { - "epoch": 1.6758606057391396, - "grad_norm": 3.216346025466919, - "learning_rate": 2.2068989904347674e-05, - "loss": 0.5366, - "step": 189570 - }, - { - "epoch": 1.675949008999452, - "grad_norm": 8.363117218017578, - "learning_rate": 2.2067516516675805e-05, - "loss": 0.5766, - "step": 189580 - }, - { - "epoch": 1.676037412259764, - "grad_norm": 6.885417938232422, - "learning_rate": 2.2066043129003934e-05, - "loss": 0.492, - "step": 189590 - }, - { - "epoch": 1.6761258155200762, - "grad_norm": 5.379096984863281, - "learning_rate": 2.2064569741332062e-05, - "loss": 0.576, - "step": 189600 - }, - { - "epoch": 1.6762142187803888, - "grad_norm": 8.03687572479248, - "learning_rate": 2.206309635366019e-05, - "loss": 0.7067, - "step": 189610 - }, - { - "epoch": 1.676302622040701, - "grad_norm": 3.711484670639038, - "learning_rate": 2.2061622965988322e-05, - "loss": 0.6372, - "step": 189620 - }, - { - "epoch": 1.676391025301013, - "grad_norm": 11.158401489257812, - "learning_rate": 2.206014957831645e-05, - "loss": 0.6165, - "step": 189630 - }, - { - "epoch": 1.6764794285613254, - "grad_norm": 2.3779067993164062, - "learning_rate": 2.205867619064458e-05, - "loss": 0.5788, - "step": 189640 - }, - { - "epoch": 1.6765678318216377, - "grad_norm": 9.3578462600708, - "learning_rate": 2.2057202802972707e-05, - "loss": 0.5638, - "step": 189650 - }, - { - "epoch": 1.6766562350819498, - "grad_norm": 4.641378402709961, - "learning_rate": 2.205572941530084e-05, - "loss": 0.5555, - "step": 189660 - }, - { - "epoch": 1.676744638342262, - "grad_norm": 2.9508461952209473, - "learning_rate": 2.2054256027628967e-05, - "loss": 0.6674, - "step": 189670 - }, - { - "epoch": 1.6768330416025743, - "grad_norm": 1.9657361507415771, - "learning_rate": 2.2052782639957096e-05, - "loss": 0.6882, - "step": 189680 - }, - { - "epoch": 1.6769214448628866, - "grad_norm": 2.3473188877105713, - "learning_rate": 2.2051309252285228e-05, - "loss": 0.5233, - "step": 189690 - }, - { - "epoch": 1.6770098481231988, - "grad_norm": 0.8163551688194275, - "learning_rate": 2.2049835864613356e-05, - "loss": 0.5934, - "step": 189700 - }, - { - "epoch": 1.6770982513835109, - "grad_norm": 5.901861667633057, - "learning_rate": 2.2048362476941484e-05, - "loss": 0.5388, - "step": 189710 - }, - { - "epoch": 1.6771866546438232, - "grad_norm": 2.464370012283325, - "learning_rate": 2.2046889089269613e-05, - "loss": 0.6292, - "step": 189720 - }, - { - "epoch": 1.6772750579041356, - "grad_norm": 3.0845062732696533, - "learning_rate": 2.2045415701597744e-05, - "loss": 0.7139, - "step": 189730 - }, - { - "epoch": 1.6773634611644477, - "grad_norm": 3.8142809867858887, - "learning_rate": 2.2043942313925873e-05, - "loss": 0.6596, - "step": 189740 - }, - { - "epoch": 1.67745186442476, - "grad_norm": 6.545985221862793, - "learning_rate": 2.2042468926254e-05, - "loss": 0.4766, - "step": 189750 - }, - { - "epoch": 1.6775402676850724, - "grad_norm": 2.106898069381714, - "learning_rate": 2.204099553858213e-05, - "loss": 0.6604, - "step": 189760 - }, - { - "epoch": 1.6776286709453845, - "grad_norm": 8.585613250732422, - "learning_rate": 2.203952215091026e-05, - "loss": 0.5835, - "step": 189770 - }, - { - "epoch": 1.6777170742056966, - "grad_norm": 0.9737794995307922, - "learning_rate": 2.203804876323839e-05, - "loss": 0.6715, - "step": 189780 - }, - { - "epoch": 1.677805477466009, - "grad_norm": 2.4751617908477783, - "learning_rate": 2.2036575375566518e-05, - "loss": 0.5614, - "step": 189790 - }, - { - "epoch": 1.6778938807263213, - "grad_norm": 8.864995956420898, - "learning_rate": 2.203510198789465e-05, - "loss": 0.6392, - "step": 189800 - }, - { - "epoch": 1.6779822839866334, - "grad_norm": 10.63521671295166, - "learning_rate": 2.2033628600222778e-05, - "loss": 0.6142, - "step": 189810 - }, - { - "epoch": 1.6780706872469455, - "grad_norm": 1.2009896039962769, - "learning_rate": 2.2032155212550906e-05, - "loss": 0.6091, - "step": 189820 - }, - { - "epoch": 1.6781590905072579, - "grad_norm": 1.397385835647583, - "learning_rate": 2.2030681824879035e-05, - "loss": 0.7609, - "step": 189830 - }, - { - "epoch": 1.6782474937675702, - "grad_norm": 2.633423328399658, - "learning_rate": 2.2029208437207166e-05, - "loss": 0.5759, - "step": 189840 - }, - { - "epoch": 1.6783358970278823, - "grad_norm": 3.4063544273376465, - "learning_rate": 2.2027735049535295e-05, - "loss": 0.5684, - "step": 189850 - }, - { - "epoch": 1.6784243002881947, - "grad_norm": 10.041712760925293, - "learning_rate": 2.2026261661863423e-05, - "loss": 0.6201, - "step": 189860 - }, - { - "epoch": 1.678512703548507, - "grad_norm": 4.14884090423584, - "learning_rate": 2.2024788274191555e-05, - "loss": 0.5453, - "step": 189870 - }, - { - "epoch": 1.6786011068088191, - "grad_norm": 6.366163730621338, - "learning_rate": 2.2023314886519683e-05, - "loss": 0.6861, - "step": 189880 - }, - { - "epoch": 1.6786895100691313, - "grad_norm": 2.5637083053588867, - "learning_rate": 2.202184149884781e-05, - "loss": 0.6522, - "step": 189890 - }, - { - "epoch": 1.6787779133294436, - "grad_norm": 1.2532386779785156, - "learning_rate": 2.202036811117594e-05, - "loss": 0.6338, - "step": 189900 - }, - { - "epoch": 1.678866316589756, - "grad_norm": 4.722858428955078, - "learning_rate": 2.201889472350407e-05, - "loss": 0.5245, - "step": 189910 - }, - { - "epoch": 1.678954719850068, - "grad_norm": 3.1113734245300293, - "learning_rate": 2.20174213358322e-05, - "loss": 0.6467, - "step": 189920 - }, - { - "epoch": 1.6790431231103802, - "grad_norm": 1.4831795692443848, - "learning_rate": 2.201594794816033e-05, - "loss": 0.5092, - "step": 189930 - }, - { - "epoch": 1.6791315263706925, - "grad_norm": 3.9340319633483887, - "learning_rate": 2.2014474560488457e-05, - "loss": 0.6271, - "step": 189940 - }, - { - "epoch": 1.6792199296310049, - "grad_norm": 1.2145519256591797, - "learning_rate": 2.201300117281659e-05, - "loss": 0.5553, - "step": 189950 - }, - { - "epoch": 1.679308332891317, - "grad_norm": 1.2873629331588745, - "learning_rate": 2.2011527785144717e-05, - "loss": 0.6549, - "step": 189960 - }, - { - "epoch": 1.6793967361516293, - "grad_norm": 1.677676796913147, - "learning_rate": 2.2010054397472845e-05, - "loss": 0.5991, - "step": 189970 - }, - { - "epoch": 1.6794851394119417, - "grad_norm": 2.600574254989624, - "learning_rate": 2.2008581009800977e-05, - "loss": 0.5597, - "step": 189980 - }, - { - "epoch": 1.6795735426722538, - "grad_norm": 3.347747564315796, - "learning_rate": 2.2007107622129105e-05, - "loss": 0.6057, - "step": 189990 - }, - { - "epoch": 1.679661945932566, - "grad_norm": 1.7320047616958618, - "learning_rate": 2.2005634234457234e-05, - "loss": 0.6227, - "step": 190000 - }, - { - "epoch": 1.6797503491928782, - "grad_norm": 3.8314831256866455, - "learning_rate": 2.2004160846785362e-05, - "loss": 0.6426, - "step": 190010 - }, - { - "epoch": 1.6798387524531906, - "grad_norm": 4.242805004119873, - "learning_rate": 2.2002687459113494e-05, - "loss": 0.6173, - "step": 190020 - }, - { - "epoch": 1.6799271557135027, - "grad_norm": 1.511187195777893, - "learning_rate": 2.2001214071441622e-05, - "loss": 0.5629, - "step": 190030 - }, - { - "epoch": 1.6800155589738148, - "grad_norm": 2.0220277309417725, - "learning_rate": 2.199974068376975e-05, - "loss": 0.5588, - "step": 190040 - }, - { - "epoch": 1.6801039622341272, - "grad_norm": 4.622857570648193, - "learning_rate": 2.1998267296097882e-05, - "loss": 0.5901, - "step": 190050 - }, - { - "epoch": 1.6801923654944395, - "grad_norm": 2.2772791385650635, - "learning_rate": 2.199679390842601e-05, - "loss": 0.6768, - "step": 190060 - }, - { - "epoch": 1.6802807687547516, - "grad_norm": 9.772955894470215, - "learning_rate": 2.199532052075414e-05, - "loss": 0.559, - "step": 190070 - }, - { - "epoch": 1.6803691720150638, - "grad_norm": 3.5287930965423584, - "learning_rate": 2.1993847133082267e-05, - "loss": 0.6372, - "step": 190080 - }, - { - "epoch": 1.6804575752753763, - "grad_norm": 3.7590224742889404, - "learning_rate": 2.19923737454104e-05, - "loss": 0.7284, - "step": 190090 - }, - { - "epoch": 1.6805459785356884, - "grad_norm": 10.513998031616211, - "learning_rate": 2.1990900357738527e-05, - "loss": 0.5029, - "step": 190100 - }, - { - "epoch": 1.6806343817960006, - "grad_norm": 1.8067030906677246, - "learning_rate": 2.1989426970066656e-05, - "loss": 0.7068, - "step": 190110 - }, - { - "epoch": 1.680722785056313, - "grad_norm": 10.699889183044434, - "learning_rate": 2.1987953582394784e-05, - "loss": 0.5368, - "step": 190120 - }, - { - "epoch": 1.6808111883166252, - "grad_norm": 10.229439735412598, - "learning_rate": 2.1986480194722916e-05, - "loss": 0.5624, - "step": 190130 - }, - { - "epoch": 1.6808995915769374, - "grad_norm": 1.7228556871414185, - "learning_rate": 2.1985006807051044e-05, - "loss": 0.6638, - "step": 190140 - }, - { - "epoch": 1.6809879948372495, - "grad_norm": 2.5064215660095215, - "learning_rate": 2.1983533419379172e-05, - "loss": 0.5796, - "step": 190150 - }, - { - "epoch": 1.6810763980975618, - "grad_norm": 3.2687125205993652, - "learning_rate": 2.1982060031707304e-05, - "loss": 0.6425, - "step": 190160 - }, - { - "epoch": 1.6811648013578742, - "grad_norm": 2.8402090072631836, - "learning_rate": 2.1980586644035433e-05, - "loss": 0.5586, - "step": 190170 - }, - { - "epoch": 1.6812532046181863, - "grad_norm": 5.387648582458496, - "learning_rate": 2.197911325636356e-05, - "loss": 0.6745, - "step": 190180 - }, - { - "epoch": 1.6813416078784984, - "grad_norm": 1.676777958869934, - "learning_rate": 2.197763986869169e-05, - "loss": 0.5778, - "step": 190190 - }, - { - "epoch": 1.681430011138811, - "grad_norm": 6.165497779846191, - "learning_rate": 2.197616648101982e-05, - "loss": 0.5702, - "step": 190200 - }, - { - "epoch": 1.681518414399123, - "grad_norm": 1.4563077688217163, - "learning_rate": 2.197469309334795e-05, - "loss": 0.5723, - "step": 190210 - }, - { - "epoch": 1.6816068176594352, - "grad_norm": 2.4805495738983154, - "learning_rate": 2.1973219705676078e-05, - "loss": 0.5249, - "step": 190220 - }, - { - "epoch": 1.6816952209197475, - "grad_norm": 2.617976188659668, - "learning_rate": 2.1971746318004206e-05, - "loss": 0.7356, - "step": 190230 - }, - { - "epoch": 1.6817836241800599, - "grad_norm": 1.9330956935882568, - "learning_rate": 2.1970272930332338e-05, - "loss": 0.6424, - "step": 190240 - }, - { - "epoch": 1.681872027440372, - "grad_norm": 11.002070426940918, - "learning_rate": 2.1968799542660466e-05, - "loss": 0.6507, - "step": 190250 - }, - { - "epoch": 1.6819604307006841, - "grad_norm": 1.3374661207199097, - "learning_rate": 2.1967326154988595e-05, - "loss": 0.5391, - "step": 190260 - }, - { - "epoch": 1.6820488339609965, - "grad_norm": 3.5891616344451904, - "learning_rate": 2.1965852767316726e-05, - "loss": 0.4846, - "step": 190270 - }, - { - "epoch": 1.6821372372213088, - "grad_norm": 1.4183052778244019, - "learning_rate": 2.1964379379644855e-05, - "loss": 0.5757, - "step": 190280 - }, - { - "epoch": 1.682225640481621, - "grad_norm": 9.315398216247559, - "learning_rate": 2.1962905991972983e-05, - "loss": 0.5603, - "step": 190290 - }, - { - "epoch": 1.682314043741933, - "grad_norm": 9.497298240661621, - "learning_rate": 2.1961432604301115e-05, - "loss": 0.6526, - "step": 190300 - }, - { - "epoch": 1.6824024470022454, - "grad_norm": 1.1867121458053589, - "learning_rate": 2.1959959216629243e-05, - "loss": 0.6201, - "step": 190310 - }, - { - "epoch": 1.6824908502625577, - "grad_norm": 9.748201370239258, - "learning_rate": 2.195848582895737e-05, - "loss": 0.4557, - "step": 190320 - }, - { - "epoch": 1.6825792535228699, - "grad_norm": 5.4763336181640625, - "learning_rate": 2.1957012441285503e-05, - "loss": 0.5302, - "step": 190330 - }, - { - "epoch": 1.6826676567831822, - "grad_norm": 2.238471031188965, - "learning_rate": 2.195553905361363e-05, - "loss": 0.4887, - "step": 190340 - }, - { - "epoch": 1.6827560600434945, - "grad_norm": 2.178219795227051, - "learning_rate": 2.195406566594176e-05, - "loss": 0.6582, - "step": 190350 - }, - { - "epoch": 1.6828444633038067, - "grad_norm": 1.5363105535507202, - "learning_rate": 2.195259227826989e-05, - "loss": 0.6112, - "step": 190360 - }, - { - "epoch": 1.6829328665641188, - "grad_norm": 8.312849044799805, - "learning_rate": 2.195111889059802e-05, - "loss": 0.5893, - "step": 190370 - }, - { - "epoch": 1.6830212698244311, - "grad_norm": 2.584324359893799, - "learning_rate": 2.194964550292615e-05, - "loss": 0.7375, - "step": 190380 - }, - { - "epoch": 1.6831096730847435, - "grad_norm": 1.2932301759719849, - "learning_rate": 2.194817211525428e-05, - "loss": 0.5513, - "step": 190390 - }, - { - "epoch": 1.6831980763450556, - "grad_norm": 2.2087790966033936, - "learning_rate": 2.194669872758241e-05, - "loss": 0.4637, - "step": 190400 - }, - { - "epoch": 1.6832864796053677, - "grad_norm": 3.2174506187438965, - "learning_rate": 2.1945225339910537e-05, - "loss": 0.6108, - "step": 190410 - }, - { - "epoch": 1.68337488286568, - "grad_norm": 15.741214752197266, - "learning_rate": 2.194375195223867e-05, - "loss": 0.56, - "step": 190420 - }, - { - "epoch": 1.6834632861259924, - "grad_norm": 4.203559398651123, - "learning_rate": 2.1942278564566797e-05, - "loss": 0.5673, - "step": 190430 - }, - { - "epoch": 1.6835516893863045, - "grad_norm": 13.096375465393066, - "learning_rate": 2.1940805176894925e-05, - "loss": 0.5454, - "step": 190440 - }, - { - "epoch": 1.6836400926466168, - "grad_norm": 4.456557750701904, - "learning_rate": 2.1939331789223057e-05, - "loss": 0.7474, - "step": 190450 - }, - { - "epoch": 1.6837284959069292, - "grad_norm": 3.961533784866333, - "learning_rate": 2.1937858401551185e-05, - "loss": 0.5512, - "step": 190460 - }, - { - "epoch": 1.6838168991672413, - "grad_norm": 14.67947769165039, - "learning_rate": 2.1936385013879314e-05, - "loss": 0.652, - "step": 190470 - }, - { - "epoch": 1.6839053024275534, - "grad_norm": 1.3641750812530518, - "learning_rate": 2.1934911626207442e-05, - "loss": 0.6523, - "step": 190480 - }, - { - "epoch": 1.6839937056878658, - "grad_norm": 2.07220458984375, - "learning_rate": 2.1933438238535574e-05, - "loss": 0.4864, - "step": 190490 - }, - { - "epoch": 1.6840821089481781, - "grad_norm": 1.1580243110656738, - "learning_rate": 2.1931964850863702e-05, - "loss": 0.5769, - "step": 190500 - }, - { - "epoch": 1.6841705122084902, - "grad_norm": 1.169252634048462, - "learning_rate": 2.193049146319183e-05, - "loss": 0.5285, - "step": 190510 - }, - { - "epoch": 1.6842589154688024, - "grad_norm": 1.5489351749420166, - "learning_rate": 2.1929018075519962e-05, - "loss": 0.5721, - "step": 190520 - }, - { - "epoch": 1.6843473187291147, - "grad_norm": 1.0660382509231567, - "learning_rate": 2.192754468784809e-05, - "loss": 0.5511, - "step": 190530 - }, - { - "epoch": 1.684435721989427, - "grad_norm": 14.691634178161621, - "learning_rate": 2.192607130017622e-05, - "loss": 0.5029, - "step": 190540 - }, - { - "epoch": 1.6845241252497392, - "grad_norm": 2.0073564052581787, - "learning_rate": 2.1924597912504347e-05, - "loss": 0.5478, - "step": 190550 - }, - { - "epoch": 1.6846125285100515, - "grad_norm": 1.3661534786224365, - "learning_rate": 2.192312452483248e-05, - "loss": 0.67, - "step": 190560 - }, - { - "epoch": 1.6847009317703638, - "grad_norm": 6.212716102600098, - "learning_rate": 2.1921651137160607e-05, - "loss": 0.6697, - "step": 190570 - }, - { - "epoch": 1.684789335030676, - "grad_norm": 4.82363748550415, - "learning_rate": 2.1920177749488736e-05, - "loss": 0.5786, - "step": 190580 - }, - { - "epoch": 1.684877738290988, - "grad_norm": 4.08050012588501, - "learning_rate": 2.1918704361816864e-05, - "loss": 0.6144, - "step": 190590 - }, - { - "epoch": 1.6849661415513004, - "grad_norm": 8.38548469543457, - "learning_rate": 2.1917230974144996e-05, - "loss": 0.6661, - "step": 190600 - }, - { - "epoch": 1.6850545448116128, - "grad_norm": 2.0309576988220215, - "learning_rate": 2.1915757586473124e-05, - "loss": 0.5592, - "step": 190610 - }, - { - "epoch": 1.6851429480719249, - "grad_norm": 8.065064430236816, - "learning_rate": 2.1914284198801253e-05, - "loss": 0.5105, - "step": 190620 - }, - { - "epoch": 1.685231351332237, - "grad_norm": 3.297581434249878, - "learning_rate": 2.1912810811129384e-05, - "loss": 0.6416, - "step": 190630 - }, - { - "epoch": 1.6853197545925493, - "grad_norm": 1.1886439323425293, - "learning_rate": 2.1911337423457513e-05, - "loss": 0.6973, - "step": 190640 - }, - { - "epoch": 1.6854081578528617, - "grad_norm": 3.1800239086151123, - "learning_rate": 2.190986403578564e-05, - "loss": 0.6965, - "step": 190650 - }, - { - "epoch": 1.6854965611131738, - "grad_norm": 2.357185125350952, - "learning_rate": 2.190839064811377e-05, - "loss": 0.4668, - "step": 190660 - }, - { - "epoch": 1.685584964373486, - "grad_norm": 2.388274669647217, - "learning_rate": 2.19069172604419e-05, - "loss": 0.6018, - "step": 190670 - }, - { - "epoch": 1.6856733676337985, - "grad_norm": 6.639049053192139, - "learning_rate": 2.190544387277003e-05, - "loss": 0.7029, - "step": 190680 - }, - { - "epoch": 1.6857617708941106, - "grad_norm": 4.946562767028809, - "learning_rate": 2.1903970485098158e-05, - "loss": 0.5823, - "step": 190690 - }, - { - "epoch": 1.6858501741544227, - "grad_norm": 1.82542085647583, - "learning_rate": 2.1902497097426286e-05, - "loss": 0.5313, - "step": 190700 - }, - { - "epoch": 1.685938577414735, - "grad_norm": 2.4510159492492676, - "learning_rate": 2.1901023709754418e-05, - "loss": 0.5712, - "step": 190710 - }, - { - "epoch": 1.6860269806750474, - "grad_norm": 1.0207104682922363, - "learning_rate": 2.1899550322082546e-05, - "loss": 0.4856, - "step": 190720 - }, - { - "epoch": 1.6861153839353595, - "grad_norm": 3.2885923385620117, - "learning_rate": 2.1898076934410675e-05, - "loss": 0.5497, - "step": 190730 - }, - { - "epoch": 1.6862037871956717, - "grad_norm": 9.004790306091309, - "learning_rate": 2.1896603546738806e-05, - "loss": 0.5325, - "step": 190740 - }, - { - "epoch": 1.686292190455984, - "grad_norm": 4.0741119384765625, - "learning_rate": 2.1895130159066935e-05, - "loss": 0.5682, - "step": 190750 - }, - { - "epoch": 1.6863805937162963, - "grad_norm": 1.089857578277588, - "learning_rate": 2.1893656771395063e-05, - "loss": 0.5986, - "step": 190760 - }, - { - "epoch": 1.6864689969766085, - "grad_norm": 5.030314922332764, - "learning_rate": 2.189218338372319e-05, - "loss": 0.6099, - "step": 190770 - }, - { - "epoch": 1.6865574002369206, - "grad_norm": 3.630523204803467, - "learning_rate": 2.1890709996051323e-05, - "loss": 0.4763, - "step": 190780 - }, - { - "epoch": 1.6866458034972331, - "grad_norm": 1.2105838060379028, - "learning_rate": 2.188923660837945e-05, - "loss": 0.626, - "step": 190790 - }, - { - "epoch": 1.6867342067575453, - "grad_norm": 3.044017791748047, - "learning_rate": 2.188776322070758e-05, - "loss": 0.595, - "step": 190800 - }, - { - "epoch": 1.6868226100178574, - "grad_norm": 3.25032639503479, - "learning_rate": 2.188628983303571e-05, - "loss": 0.7181, - "step": 190810 - }, - { - "epoch": 1.6869110132781697, - "grad_norm": 2.1563756465911865, - "learning_rate": 2.188481644536384e-05, - "loss": 0.4322, - "step": 190820 - }, - { - "epoch": 1.686999416538482, - "grad_norm": 3.811094045639038, - "learning_rate": 2.188334305769197e-05, - "loss": 0.4748, - "step": 190830 - }, - { - "epoch": 1.6870878197987942, - "grad_norm": 5.4308648109436035, - "learning_rate": 2.1881869670020097e-05, - "loss": 0.5897, - "step": 190840 - }, - { - "epoch": 1.6871762230591063, - "grad_norm": 1.9380451440811157, - "learning_rate": 2.188039628234823e-05, - "loss": 0.5574, - "step": 190850 - }, - { - "epoch": 1.6872646263194186, - "grad_norm": 1.8837717771530151, - "learning_rate": 2.1878922894676357e-05, - "loss": 0.6488, - "step": 190860 - }, - { - "epoch": 1.687353029579731, - "grad_norm": 2.0130834579467773, - "learning_rate": 2.1877449507004485e-05, - "loss": 0.627, - "step": 190870 - }, - { - "epoch": 1.687441432840043, - "grad_norm": 1.5990207195281982, - "learning_rate": 2.1875976119332613e-05, - "loss": 0.5575, - "step": 190880 - }, - { - "epoch": 1.6875298361003552, - "grad_norm": 1.9707977771759033, - "learning_rate": 2.1874502731660745e-05, - "loss": 0.5864, - "step": 190890 - }, - { - "epoch": 1.6876182393606676, - "grad_norm": 3.9532060623168945, - "learning_rate": 2.1873029343988874e-05, - "loss": 0.6636, - "step": 190900 - }, - { - "epoch": 1.68770664262098, - "grad_norm": 2.506739854812622, - "learning_rate": 2.1871555956317002e-05, - "loss": 0.5359, - "step": 190910 - }, - { - "epoch": 1.687795045881292, - "grad_norm": 2.4119558334350586, - "learning_rate": 2.1870082568645134e-05, - "loss": 0.6798, - "step": 190920 - }, - { - "epoch": 1.6878834491416044, - "grad_norm": 4.066296100616455, - "learning_rate": 2.1868609180973262e-05, - "loss": 0.6281, - "step": 190930 - }, - { - "epoch": 1.6879718524019167, - "grad_norm": 3.9416284561157227, - "learning_rate": 2.186713579330139e-05, - "loss": 0.6315, - "step": 190940 - }, - { - "epoch": 1.6880602556622288, - "grad_norm": 2.1455953121185303, - "learning_rate": 2.186566240562952e-05, - "loss": 0.6335, - "step": 190950 - }, - { - "epoch": 1.688148658922541, - "grad_norm": 3.3208320140838623, - "learning_rate": 2.186418901795765e-05, - "loss": 0.5828, - "step": 190960 - }, - { - "epoch": 1.6882370621828533, - "grad_norm": 1.930897831916809, - "learning_rate": 2.186271563028578e-05, - "loss": 0.6321, - "step": 190970 - }, - { - "epoch": 1.6883254654431656, - "grad_norm": 3.0473833084106445, - "learning_rate": 2.1861242242613907e-05, - "loss": 0.6549, - "step": 190980 - }, - { - "epoch": 1.6884138687034778, - "grad_norm": 7.404590606689453, - "learning_rate": 2.185976885494204e-05, - "loss": 0.6192, - "step": 190990 - }, - { - "epoch": 1.6885022719637899, - "grad_norm": 1.7399927377700806, - "learning_rate": 2.1858295467270167e-05, - "loss": 0.6563, - "step": 191000 - }, - { - "epoch": 1.6885906752241022, - "grad_norm": 11.48447036743164, - "learning_rate": 2.1856822079598296e-05, - "loss": 0.5461, - "step": 191010 - }, - { - "epoch": 1.6886790784844146, - "grad_norm": 1.321085810661316, - "learning_rate": 2.1855348691926424e-05, - "loss": 0.5534, - "step": 191020 - }, - { - "epoch": 1.6887674817447267, - "grad_norm": 4.213069915771484, - "learning_rate": 2.1853875304254556e-05, - "loss": 0.5374, - "step": 191030 - }, - { - "epoch": 1.688855885005039, - "grad_norm": 1.8781251907348633, - "learning_rate": 2.1852401916582684e-05, - "loss": 0.5992, - "step": 191040 - }, - { - "epoch": 1.6889442882653514, - "grad_norm": 15.161416053771973, - "learning_rate": 2.1850928528910812e-05, - "loss": 0.5349, - "step": 191050 - }, - { - "epoch": 1.6890326915256635, - "grad_norm": 11.546005249023438, - "learning_rate": 2.184945514123894e-05, - "loss": 0.6534, - "step": 191060 - }, - { - "epoch": 1.6891210947859756, - "grad_norm": 7.424916744232178, - "learning_rate": 2.1847981753567073e-05, - "loss": 0.6056, - "step": 191070 - }, - { - "epoch": 1.689209498046288, - "grad_norm": 2.070619821548462, - "learning_rate": 2.18465083658952e-05, - "loss": 0.5307, - "step": 191080 - }, - { - "epoch": 1.6892979013066003, - "grad_norm": 0.9939049482345581, - "learning_rate": 2.184503497822333e-05, - "loss": 0.5588, - "step": 191090 - }, - { - "epoch": 1.6893863045669124, - "grad_norm": 4.085865020751953, - "learning_rate": 2.184356159055146e-05, - "loss": 0.5902, - "step": 191100 - }, - { - "epoch": 1.6894747078272245, - "grad_norm": 2.3851819038391113, - "learning_rate": 2.184208820287959e-05, - "loss": 0.5819, - "step": 191110 - }, - { - "epoch": 1.6895631110875369, - "grad_norm": 0.5734080076217651, - "learning_rate": 2.1840614815207718e-05, - "loss": 0.5529, - "step": 191120 - }, - { - "epoch": 1.6896515143478492, - "grad_norm": 3.099874258041382, - "learning_rate": 2.1839141427535846e-05, - "loss": 0.5503, - "step": 191130 - }, - { - "epoch": 1.6897399176081613, - "grad_norm": 1.7946574687957764, - "learning_rate": 2.1837668039863978e-05, - "loss": 0.7642, - "step": 191140 - }, - { - "epoch": 1.6898283208684737, - "grad_norm": 3.9887897968292236, - "learning_rate": 2.1836194652192106e-05, - "loss": 0.487, - "step": 191150 - }, - { - "epoch": 1.689916724128786, - "grad_norm": 2.7381110191345215, - "learning_rate": 2.1834721264520234e-05, - "loss": 0.704, - "step": 191160 - }, - { - "epoch": 1.6900051273890981, - "grad_norm": 2.4239556789398193, - "learning_rate": 2.1833247876848366e-05, - "loss": 0.5848, - "step": 191170 - }, - { - "epoch": 1.6900935306494103, - "grad_norm": 2.926347494125366, - "learning_rate": 2.1831774489176495e-05, - "loss": 0.4912, - "step": 191180 - }, - { - "epoch": 1.6901819339097226, - "grad_norm": 3.046971082687378, - "learning_rate": 2.1830301101504623e-05, - "loss": 0.5571, - "step": 191190 - }, - { - "epoch": 1.690270337170035, - "grad_norm": 2.106529951095581, - "learning_rate": 2.182882771383275e-05, - "loss": 0.4651, - "step": 191200 - }, - { - "epoch": 1.690358740430347, - "grad_norm": 2.125847578048706, - "learning_rate": 2.1827354326160883e-05, - "loss": 0.6046, - "step": 191210 - }, - { - "epoch": 1.6904471436906592, - "grad_norm": 1.7945876121520996, - "learning_rate": 2.182588093848901e-05, - "loss": 0.4768, - "step": 191220 - }, - { - "epoch": 1.6905355469509715, - "grad_norm": 5.428643703460693, - "learning_rate": 2.182440755081714e-05, - "loss": 0.5581, - "step": 191230 - }, - { - "epoch": 1.6906239502112839, - "grad_norm": 1.877885103225708, - "learning_rate": 2.182293416314527e-05, - "loss": 0.5371, - "step": 191240 - }, - { - "epoch": 1.690712353471596, - "grad_norm": 2.7024946212768555, - "learning_rate": 2.18214607754734e-05, - "loss": 0.6356, - "step": 191250 - }, - { - "epoch": 1.6908007567319083, - "grad_norm": 4.930706977844238, - "learning_rate": 2.1819987387801528e-05, - "loss": 0.5158, - "step": 191260 - }, - { - "epoch": 1.6908891599922207, - "grad_norm": 2.5163559913635254, - "learning_rate": 2.181851400012966e-05, - "loss": 0.6207, - "step": 191270 - }, - { - "epoch": 1.6909775632525328, - "grad_norm": 1.711630940437317, - "learning_rate": 2.1817040612457788e-05, - "loss": 0.5904, - "step": 191280 - }, - { - "epoch": 1.691065966512845, - "grad_norm": 7.230292320251465, - "learning_rate": 2.1815567224785917e-05, - "loss": 0.6343, - "step": 191290 - }, - { - "epoch": 1.6911543697731573, - "grad_norm": 4.234020709991455, - "learning_rate": 2.181409383711405e-05, - "loss": 0.583, - "step": 191300 - }, - { - "epoch": 1.6912427730334696, - "grad_norm": 2.0680196285247803, - "learning_rate": 2.1812620449442177e-05, - "loss": 0.679, - "step": 191310 - }, - { - "epoch": 1.6913311762937817, - "grad_norm": 2.2695934772491455, - "learning_rate": 2.1811147061770305e-05, - "loss": 0.5432, - "step": 191320 - }, - { - "epoch": 1.6914195795540938, - "grad_norm": 2.44629168510437, - "learning_rate": 2.1809673674098437e-05, - "loss": 0.6198, - "step": 191330 - }, - { - "epoch": 1.6915079828144062, - "grad_norm": 2.11736798286438, - "learning_rate": 2.1808200286426565e-05, - "loss": 0.5626, - "step": 191340 - }, - { - "epoch": 1.6915963860747185, - "grad_norm": 1.8716286420822144, - "learning_rate": 2.1806726898754694e-05, - "loss": 0.544, - "step": 191350 - }, - { - "epoch": 1.6916847893350306, - "grad_norm": 4.241274833679199, - "learning_rate": 2.1805253511082825e-05, - "loss": 0.3895, - "step": 191360 - }, - { - "epoch": 1.6917731925953428, - "grad_norm": 2.03879451751709, - "learning_rate": 2.1803780123410954e-05, - "loss": 0.5302, - "step": 191370 - }, - { - "epoch": 1.6918615958556553, - "grad_norm": 1.6585477590560913, - "learning_rate": 2.1802306735739082e-05, - "loss": 0.5525, - "step": 191380 - }, - { - "epoch": 1.6919499991159674, - "grad_norm": 1.5219755172729492, - "learning_rate": 2.1800833348067214e-05, - "loss": 0.5222, - "step": 191390 - }, - { - "epoch": 1.6920384023762796, - "grad_norm": 2.376049518585205, - "learning_rate": 2.1799359960395342e-05, - "loss": 0.601, - "step": 191400 - }, - { - "epoch": 1.692126805636592, - "grad_norm": 3.1819357872009277, - "learning_rate": 2.179788657272347e-05, - "loss": 0.5259, - "step": 191410 - }, - { - "epoch": 1.6922152088969042, - "grad_norm": 4.571484088897705, - "learning_rate": 2.17964131850516e-05, - "loss": 0.5156, - "step": 191420 - }, - { - "epoch": 1.6923036121572164, - "grad_norm": 7.575584411621094, - "learning_rate": 2.179493979737973e-05, - "loss": 0.7005, - "step": 191430 - }, - { - "epoch": 1.6923920154175285, - "grad_norm": 8.56787109375, - "learning_rate": 2.179346640970786e-05, - "loss": 0.6455, - "step": 191440 - }, - { - "epoch": 1.6924804186778408, - "grad_norm": 6.546863555908203, - "learning_rate": 2.1791993022035987e-05, - "loss": 0.7042, - "step": 191450 - }, - { - "epoch": 1.6925688219381532, - "grad_norm": 1.9080413579940796, - "learning_rate": 2.179051963436412e-05, - "loss": 0.6087, - "step": 191460 - }, - { - "epoch": 1.6926572251984653, - "grad_norm": 5.976744651794434, - "learning_rate": 2.1789046246692247e-05, - "loss": 0.5607, - "step": 191470 - }, - { - "epoch": 1.6927456284587774, - "grad_norm": 1.779448390007019, - "learning_rate": 2.1787572859020376e-05, - "loss": 0.628, - "step": 191480 - }, - { - "epoch": 1.6928340317190897, - "grad_norm": 2.0550694465637207, - "learning_rate": 2.1786099471348504e-05, - "loss": 0.5087, - "step": 191490 - }, - { - "epoch": 1.692922434979402, - "grad_norm": 1.993318796157837, - "learning_rate": 2.1784626083676636e-05, - "loss": 0.6802, - "step": 191500 - }, - { - "epoch": 1.6930108382397142, - "grad_norm": 2.724390983581543, - "learning_rate": 2.1783152696004764e-05, - "loss": 0.63, - "step": 191510 - }, - { - "epoch": 1.6930992415000266, - "grad_norm": 2.7625443935394287, - "learning_rate": 2.1781679308332893e-05, - "loss": 0.4151, - "step": 191520 - }, - { - "epoch": 1.693187644760339, - "grad_norm": 5.640219688415527, - "learning_rate": 2.178020592066102e-05, - "loss": 0.5271, - "step": 191530 - }, - { - "epoch": 1.693276048020651, - "grad_norm": 2.244678258895874, - "learning_rate": 2.1778732532989153e-05, - "loss": 0.5969, - "step": 191540 - }, - { - "epoch": 1.6933644512809631, - "grad_norm": 5.617044925689697, - "learning_rate": 2.177725914531728e-05, - "loss": 0.5922, - "step": 191550 - }, - { - "epoch": 1.6934528545412755, - "grad_norm": 1.4208678007125854, - "learning_rate": 2.177578575764541e-05, - "loss": 0.5552, - "step": 191560 - }, - { - "epoch": 1.6935412578015878, - "grad_norm": 1.5686712265014648, - "learning_rate": 2.177431236997354e-05, - "loss": 0.627, - "step": 191570 - }, - { - "epoch": 1.6936296610619, - "grad_norm": 6.022036552429199, - "learning_rate": 2.177283898230167e-05, - "loss": 0.6061, - "step": 191580 - }, - { - "epoch": 1.693718064322212, - "grad_norm": 18.178495407104492, - "learning_rate": 2.1771365594629798e-05, - "loss": 0.4567, - "step": 191590 - }, - { - "epoch": 1.6938064675825244, - "grad_norm": 2.7776973247528076, - "learning_rate": 2.1769892206957926e-05, - "loss": 0.5015, - "step": 191600 - }, - { - "epoch": 1.6938948708428367, - "grad_norm": 1.8666770458221436, - "learning_rate": 2.1768418819286058e-05, - "loss": 0.6053, - "step": 191610 - }, - { - "epoch": 1.6939832741031489, - "grad_norm": 1.6332348585128784, - "learning_rate": 2.1766945431614186e-05, - "loss": 0.6221, - "step": 191620 - }, - { - "epoch": 1.6940716773634612, - "grad_norm": 3.93522572517395, - "learning_rate": 2.1765472043942315e-05, - "loss": 0.7227, - "step": 191630 - }, - { - "epoch": 1.6941600806237735, - "grad_norm": 6.13995885848999, - "learning_rate": 2.1763998656270446e-05, - "loss": 0.5609, - "step": 191640 - }, - { - "epoch": 1.6942484838840857, - "grad_norm": 3.6988348960876465, - "learning_rate": 2.1762525268598575e-05, - "loss": 0.6668, - "step": 191650 - }, - { - "epoch": 1.6943368871443978, - "grad_norm": 16.96199607849121, - "learning_rate": 2.1761051880926703e-05, - "loss": 0.5778, - "step": 191660 - }, - { - "epoch": 1.6944252904047101, - "grad_norm": 2.165583848953247, - "learning_rate": 2.175957849325483e-05, - "loss": 0.5572, - "step": 191670 - }, - { - "epoch": 1.6945136936650225, - "grad_norm": 1.700186848640442, - "learning_rate": 2.1758105105582963e-05, - "loss": 0.6116, - "step": 191680 - }, - { - "epoch": 1.6946020969253346, - "grad_norm": 5.387173175811768, - "learning_rate": 2.175663171791109e-05, - "loss": 0.5563, - "step": 191690 - }, - { - "epoch": 1.6946905001856467, - "grad_norm": 4.737339019775391, - "learning_rate": 2.175515833023922e-05, - "loss": 0.5988, - "step": 191700 - }, - { - "epoch": 1.694778903445959, - "grad_norm": 6.355472087860107, - "learning_rate": 2.1753684942567348e-05, - "loss": 0.5269, - "step": 191710 - }, - { - "epoch": 1.6948673067062714, - "grad_norm": 8.328197479248047, - "learning_rate": 2.175221155489548e-05, - "loss": 0.605, - "step": 191720 - }, - { - "epoch": 1.6949557099665835, - "grad_norm": 4.873742580413818, - "learning_rate": 2.1750738167223608e-05, - "loss": 0.5307, - "step": 191730 - }, - { - "epoch": 1.6950441132268959, - "grad_norm": 11.200762748718262, - "learning_rate": 2.1749264779551737e-05, - "loss": 0.516, - "step": 191740 - }, - { - "epoch": 1.6951325164872082, - "grad_norm": 2.6554524898529053, - "learning_rate": 2.174779139187987e-05, - "loss": 0.6569, - "step": 191750 - }, - { - "epoch": 1.6952209197475203, - "grad_norm": 2.401576280593872, - "learning_rate": 2.1746318004207997e-05, - "loss": 0.6212, - "step": 191760 - }, - { - "epoch": 1.6953093230078324, - "grad_norm": 2.919806480407715, - "learning_rate": 2.1744844616536125e-05, - "loss": 0.5842, - "step": 191770 - }, - { - "epoch": 1.6953977262681448, - "grad_norm": 1.6942613124847412, - "learning_rate": 2.1743371228864253e-05, - "loss": 0.6789, - "step": 191780 - }, - { - "epoch": 1.6954861295284571, - "grad_norm": 1.4825233221054077, - "learning_rate": 2.1741897841192385e-05, - "loss": 0.5009, - "step": 191790 - }, - { - "epoch": 1.6955745327887692, - "grad_norm": 6.986950397491455, - "learning_rate": 2.1740424453520514e-05, - "loss": 0.4956, - "step": 191800 - }, - { - "epoch": 1.6956629360490814, - "grad_norm": 0.8059583306312561, - "learning_rate": 2.1738951065848642e-05, - "loss": 0.429, - "step": 191810 - }, - { - "epoch": 1.6957513393093937, - "grad_norm": 0.841052234172821, - "learning_rate": 2.173747767817677e-05, - "loss": 0.4548, - "step": 191820 - }, - { - "epoch": 1.695839742569706, - "grad_norm": 1.0270919799804688, - "learning_rate": 2.1736004290504902e-05, - "loss": 0.5977, - "step": 191830 - }, - { - "epoch": 1.6959281458300182, - "grad_norm": 3.210310220718384, - "learning_rate": 2.173453090283303e-05, - "loss": 0.654, - "step": 191840 - }, - { - "epoch": 1.6960165490903305, - "grad_norm": 1.1407127380371094, - "learning_rate": 2.173305751516116e-05, - "loss": 0.6051, - "step": 191850 - }, - { - "epoch": 1.6961049523506428, - "grad_norm": 6.326188087463379, - "learning_rate": 2.173158412748929e-05, - "loss": 0.6217, - "step": 191860 - }, - { - "epoch": 1.696193355610955, - "grad_norm": 2.7178454399108887, - "learning_rate": 2.173011073981742e-05, - "loss": 0.4836, - "step": 191870 - }, - { - "epoch": 1.696281758871267, - "grad_norm": 0.8690058588981628, - "learning_rate": 2.1728637352145547e-05, - "loss": 0.5596, - "step": 191880 - }, - { - "epoch": 1.6963701621315794, - "grad_norm": 3.9567224979400635, - "learning_rate": 2.1727163964473675e-05, - "loss": 0.5937, - "step": 191890 - }, - { - "epoch": 1.6964585653918918, - "grad_norm": 2.895535707473755, - "learning_rate": 2.1725690576801807e-05, - "loss": 0.6673, - "step": 191900 - }, - { - "epoch": 1.696546968652204, - "grad_norm": 1.5205992460250854, - "learning_rate": 2.1724217189129936e-05, - "loss": 0.6333, - "step": 191910 - }, - { - "epoch": 1.696635371912516, - "grad_norm": 1.8309000730514526, - "learning_rate": 2.1722743801458064e-05, - "loss": 0.6837, - "step": 191920 - }, - { - "epoch": 1.6967237751728284, - "grad_norm": 10.216381072998047, - "learning_rate": 2.1721270413786196e-05, - "loss": 0.5494, - "step": 191930 - }, - { - "epoch": 1.6968121784331407, - "grad_norm": 7.729549407958984, - "learning_rate": 2.1719797026114324e-05, - "loss": 0.5908, - "step": 191940 - }, - { - "epoch": 1.6969005816934528, - "grad_norm": 8.199275970458984, - "learning_rate": 2.1718323638442452e-05, - "loss": 0.5744, - "step": 191950 - }, - { - "epoch": 1.696988984953765, - "grad_norm": 1.3720570802688599, - "learning_rate": 2.171685025077058e-05, - "loss": 0.5484, - "step": 191960 - }, - { - "epoch": 1.6970773882140775, - "grad_norm": 2.224489212036133, - "learning_rate": 2.1715376863098712e-05, - "loss": 0.5152, - "step": 191970 - }, - { - "epoch": 1.6971657914743896, - "grad_norm": 1.7162714004516602, - "learning_rate": 2.171390347542684e-05, - "loss": 0.6841, - "step": 191980 - }, - { - "epoch": 1.6972541947347017, - "grad_norm": 2.6422953605651855, - "learning_rate": 2.171243008775497e-05, - "loss": 0.5728, - "step": 191990 - }, - { - "epoch": 1.697342597995014, - "grad_norm": 7.1870198249816895, - "learning_rate": 2.1710956700083098e-05, - "loss": 0.6655, - "step": 192000 - }, - { - "epoch": 1.6974310012553264, - "grad_norm": 1.5172876119613647, - "learning_rate": 2.170948331241123e-05, - "loss": 0.5648, - "step": 192010 - }, - { - "epoch": 1.6975194045156385, - "grad_norm": 1.8165618181228638, - "learning_rate": 2.1708009924739358e-05, - "loss": 0.6453, - "step": 192020 - }, - { - "epoch": 1.6976078077759507, - "grad_norm": 5.7464470863342285, - "learning_rate": 2.1706536537067486e-05, - "loss": 0.5617, - "step": 192030 - }, - { - "epoch": 1.697696211036263, - "grad_norm": 2.307619571685791, - "learning_rate": 2.1705063149395618e-05, - "loss": 0.4716, - "step": 192040 - }, - { - "epoch": 1.6977846142965753, - "grad_norm": 7.959567070007324, - "learning_rate": 2.1703589761723746e-05, - "loss": 0.5985, - "step": 192050 - }, - { - "epoch": 1.6978730175568875, - "grad_norm": 3.922034978866577, - "learning_rate": 2.1702116374051874e-05, - "loss": 0.6511, - "step": 192060 - }, - { - "epoch": 1.6979614208171996, - "grad_norm": 4.7937726974487305, - "learning_rate": 2.1700642986380003e-05, - "loss": 0.7351, - "step": 192070 - }, - { - "epoch": 1.698049824077512, - "grad_norm": 3.758579969406128, - "learning_rate": 2.1699169598708135e-05, - "loss": 0.6756, - "step": 192080 - }, - { - "epoch": 1.6981382273378243, - "grad_norm": 1.4163635969161987, - "learning_rate": 2.1697696211036263e-05, - "loss": 0.6984, - "step": 192090 - }, - { - "epoch": 1.6982266305981364, - "grad_norm": 6.909902572631836, - "learning_rate": 2.169622282336439e-05, - "loss": 0.6313, - "step": 192100 - }, - { - "epoch": 1.6983150338584487, - "grad_norm": 2.223724126815796, - "learning_rate": 2.1694749435692523e-05, - "loss": 0.5948, - "step": 192110 - }, - { - "epoch": 1.698403437118761, - "grad_norm": 2.031708240509033, - "learning_rate": 2.169327604802065e-05, - "loss": 0.5754, - "step": 192120 - }, - { - "epoch": 1.6984918403790732, - "grad_norm": 2.6645381450653076, - "learning_rate": 2.169180266034878e-05, - "loss": 0.5845, - "step": 192130 - }, - { - "epoch": 1.6985802436393853, - "grad_norm": 1.2635366916656494, - "learning_rate": 2.1690329272676908e-05, - "loss": 0.4683, - "step": 192140 - }, - { - "epoch": 1.6986686468996977, - "grad_norm": 1.452958583831787, - "learning_rate": 2.168885588500504e-05, - "loss": 0.5489, - "step": 192150 - }, - { - "epoch": 1.69875705016001, - "grad_norm": 2.634744167327881, - "learning_rate": 2.1687382497333168e-05, - "loss": 0.6094, - "step": 192160 - }, - { - "epoch": 1.6988454534203221, - "grad_norm": 1.571746587753296, - "learning_rate": 2.1685909109661296e-05, - "loss": 0.7726, - "step": 192170 - }, - { - "epoch": 1.6989338566806342, - "grad_norm": 1.0951273441314697, - "learning_rate": 2.1684435721989428e-05, - "loss": 0.5322, - "step": 192180 - }, - { - "epoch": 1.6990222599409466, - "grad_norm": 3.656393527984619, - "learning_rate": 2.1682962334317557e-05, - "loss": 0.5525, - "step": 192190 - }, - { - "epoch": 1.699110663201259, - "grad_norm": 7.627120494842529, - "learning_rate": 2.168148894664569e-05, - "loss": 0.5073, - "step": 192200 - }, - { - "epoch": 1.699199066461571, - "grad_norm": 8.105610847473145, - "learning_rate": 2.1680015558973817e-05, - "loss": 0.4916, - "step": 192210 - }, - { - "epoch": 1.6992874697218834, - "grad_norm": 11.996027946472168, - "learning_rate": 2.1678542171301945e-05, - "loss": 0.5124, - "step": 192220 - }, - { - "epoch": 1.6993758729821957, - "grad_norm": 4.262763023376465, - "learning_rate": 2.1677068783630077e-05, - "loss": 0.5752, - "step": 192230 - }, - { - "epoch": 1.6994642762425078, - "grad_norm": 2.279161214828491, - "learning_rate": 2.1675595395958205e-05, - "loss": 0.6714, - "step": 192240 - }, - { - "epoch": 1.69955267950282, - "grad_norm": 2.2455124855041504, - "learning_rate": 2.1674122008286333e-05, - "loss": 0.5061, - "step": 192250 - }, - { - "epoch": 1.6996410827631323, - "grad_norm": 3.2316677570343018, - "learning_rate": 2.1672648620614465e-05, - "loss": 0.4982, - "step": 192260 - }, - { - "epoch": 1.6997294860234446, - "grad_norm": 1.562880039215088, - "learning_rate": 2.1671175232942594e-05, - "loss": 0.7062, - "step": 192270 - }, - { - "epoch": 1.6998178892837568, - "grad_norm": 13.20670223236084, - "learning_rate": 2.1669701845270722e-05, - "loss": 0.5343, - "step": 192280 - }, - { - "epoch": 1.6999062925440689, - "grad_norm": 1.3940116167068481, - "learning_rate": 2.166822845759885e-05, - "loss": 0.574, - "step": 192290 - }, - { - "epoch": 1.6999946958043812, - "grad_norm": 2.756795644760132, - "learning_rate": 2.1666755069926982e-05, - "loss": 0.6782, - "step": 192300 - }, - { - "epoch": 1.7000830990646936, - "grad_norm": 2.0956616401672363, - "learning_rate": 2.166528168225511e-05, - "loss": 0.7429, - "step": 192310 - }, - { - "epoch": 1.7001715023250057, - "grad_norm": 4.234799385070801, - "learning_rate": 2.166380829458324e-05, - "loss": 0.6413, - "step": 192320 - }, - { - "epoch": 1.700259905585318, - "grad_norm": 3.032193183898926, - "learning_rate": 2.166233490691137e-05, - "loss": 0.6566, - "step": 192330 - }, - { - "epoch": 1.7003483088456304, - "grad_norm": 1.0524382591247559, - "learning_rate": 2.16608615192395e-05, - "loss": 0.5907, - "step": 192340 - }, - { - "epoch": 1.7004367121059425, - "grad_norm": 1.5335265398025513, - "learning_rate": 2.1659388131567627e-05, - "loss": 0.487, - "step": 192350 - }, - { - "epoch": 1.7005251153662546, - "grad_norm": 1.9411295652389526, - "learning_rate": 2.1657914743895756e-05, - "loss": 0.5349, - "step": 192360 - }, - { - "epoch": 1.700613518626567, - "grad_norm": 3.3382508754730225, - "learning_rate": 2.1656441356223887e-05, - "loss": 0.6519, - "step": 192370 - }, - { - "epoch": 1.7007019218868793, - "grad_norm": 2.6799464225769043, - "learning_rate": 2.1654967968552016e-05, - "loss": 0.5604, - "step": 192380 - }, - { - "epoch": 1.7007903251471914, - "grad_norm": 5.3011322021484375, - "learning_rate": 2.1653494580880144e-05, - "loss": 0.8506, - "step": 192390 - }, - { - "epoch": 1.7008787284075035, - "grad_norm": 7.716976165771484, - "learning_rate": 2.1652021193208276e-05, - "loss": 0.5934, - "step": 192400 - }, - { - "epoch": 1.7009671316678159, - "grad_norm": 1.4680860042572021, - "learning_rate": 2.1650547805536404e-05, - "loss": 0.6041, - "step": 192410 - }, - { - "epoch": 1.7010555349281282, - "grad_norm": 9.352996826171875, - "learning_rate": 2.1649074417864532e-05, - "loss": 0.6027, - "step": 192420 - }, - { - "epoch": 1.7011439381884403, - "grad_norm": 2.039008378982544, - "learning_rate": 2.164760103019266e-05, - "loss": 0.6861, - "step": 192430 - }, - { - "epoch": 1.7012323414487527, - "grad_norm": 3.9179604053497314, - "learning_rate": 2.1646127642520793e-05, - "loss": 0.7188, - "step": 192440 - }, - { - "epoch": 1.701320744709065, - "grad_norm": 5.1725029945373535, - "learning_rate": 2.164465425484892e-05, - "loss": 0.5822, - "step": 192450 - }, - { - "epoch": 1.7014091479693771, - "grad_norm": 2.0126705169677734, - "learning_rate": 2.164318086717705e-05, - "loss": 0.5184, - "step": 192460 - }, - { - "epoch": 1.7014975512296893, - "grad_norm": 2.5465290546417236, - "learning_rate": 2.1641707479505178e-05, - "loss": 0.5563, - "step": 192470 - }, - { - "epoch": 1.7015859544900016, - "grad_norm": 2.2157347202301025, - "learning_rate": 2.164023409183331e-05, - "loss": 0.6306, - "step": 192480 - }, - { - "epoch": 1.701674357750314, - "grad_norm": 1.3446446657180786, - "learning_rate": 2.1638760704161438e-05, - "loss": 0.6451, - "step": 192490 - }, - { - "epoch": 1.701762761010626, - "grad_norm": 3.229628086090088, - "learning_rate": 2.1637287316489566e-05, - "loss": 0.7066, - "step": 192500 - }, - { - "epoch": 1.7018511642709382, - "grad_norm": 1.7282774448394775, - "learning_rate": 2.1635813928817698e-05, - "loss": 0.5993, - "step": 192510 - }, - { - "epoch": 1.7019395675312505, - "grad_norm": 4.4794535636901855, - "learning_rate": 2.1634340541145826e-05, - "loss": 0.6174, - "step": 192520 - }, - { - "epoch": 1.7020279707915629, - "grad_norm": 2.4877657890319824, - "learning_rate": 2.1632867153473955e-05, - "loss": 0.5814, - "step": 192530 - }, - { - "epoch": 1.702116374051875, - "grad_norm": 2.519657850265503, - "learning_rate": 2.1631393765802083e-05, - "loss": 0.5948, - "step": 192540 - }, - { - "epoch": 1.7022047773121871, - "grad_norm": 1.9545468091964722, - "learning_rate": 2.1629920378130215e-05, - "loss": 0.6662, - "step": 192550 - }, - { - "epoch": 1.7022931805724997, - "grad_norm": 1.0247230529785156, - "learning_rate": 2.1628446990458343e-05, - "loss": 0.5043, - "step": 192560 - }, - { - "epoch": 1.7023815838328118, - "grad_norm": 1.5919438600540161, - "learning_rate": 2.162697360278647e-05, - "loss": 0.5179, - "step": 192570 - }, - { - "epoch": 1.702469987093124, - "grad_norm": 1.2989373207092285, - "learning_rate": 2.1625500215114603e-05, - "loss": 0.6397, - "step": 192580 - }, - { - "epoch": 1.7025583903534363, - "grad_norm": 1.4600801467895508, - "learning_rate": 2.162402682744273e-05, - "loss": 0.581, - "step": 192590 - }, - { - "epoch": 1.7026467936137486, - "grad_norm": 9.107261657714844, - "learning_rate": 2.162255343977086e-05, - "loss": 0.6276, - "step": 192600 - }, - { - "epoch": 1.7027351968740607, - "grad_norm": 1.6058019399642944, - "learning_rate": 2.1621080052098988e-05, - "loss": 0.6251, - "step": 192610 - }, - { - "epoch": 1.7028236001343728, - "grad_norm": 1.7118215560913086, - "learning_rate": 2.161960666442712e-05, - "loss": 0.5064, - "step": 192620 - }, - { - "epoch": 1.7029120033946852, - "grad_norm": 16.251148223876953, - "learning_rate": 2.1618133276755248e-05, - "loss": 0.6524, - "step": 192630 - }, - { - "epoch": 1.7030004066549975, - "grad_norm": 3.134155511856079, - "learning_rate": 2.1616659889083377e-05, - "loss": 0.5798, - "step": 192640 - }, - { - "epoch": 1.7030888099153096, - "grad_norm": 2.934706449508667, - "learning_rate": 2.1615186501411505e-05, - "loss": 0.6212, - "step": 192650 - }, - { - "epoch": 1.7031772131756218, - "grad_norm": 1.3024001121520996, - "learning_rate": 2.1613713113739637e-05, - "loss": 0.5679, - "step": 192660 - }, - { - "epoch": 1.703265616435934, - "grad_norm": 4.700631618499756, - "learning_rate": 2.1612239726067765e-05, - "loss": 0.5954, - "step": 192670 - }, - { - "epoch": 1.7033540196962464, - "grad_norm": 2.8760063648223877, - "learning_rate": 2.1610766338395893e-05, - "loss": 0.6942, - "step": 192680 - }, - { - "epoch": 1.7034424229565586, - "grad_norm": 1.6820541620254517, - "learning_rate": 2.1609292950724025e-05, - "loss": 0.782, - "step": 192690 - }, - { - "epoch": 1.703530826216871, - "grad_norm": 1.9463380575180054, - "learning_rate": 2.1607819563052153e-05, - "loss": 0.6404, - "step": 192700 - }, - { - "epoch": 1.7036192294771833, - "grad_norm": 1.1862205266952515, - "learning_rate": 2.1606346175380282e-05, - "loss": 0.5103, - "step": 192710 - }, - { - "epoch": 1.7037076327374954, - "grad_norm": 4.426752090454102, - "learning_rate": 2.160487278770841e-05, - "loss": 0.5889, - "step": 192720 - }, - { - "epoch": 1.7037960359978075, - "grad_norm": 0.9702103734016418, - "learning_rate": 2.1603399400036542e-05, - "loss": 0.607, - "step": 192730 - }, - { - "epoch": 1.7038844392581198, - "grad_norm": 22.196788787841797, - "learning_rate": 2.160192601236467e-05, - "loss": 0.8177, - "step": 192740 - }, - { - "epoch": 1.7039728425184322, - "grad_norm": 9.930220603942871, - "learning_rate": 2.16004526246928e-05, - "loss": 0.5192, - "step": 192750 - }, - { - "epoch": 1.7040612457787443, - "grad_norm": 2.2557296752929688, - "learning_rate": 2.159897923702093e-05, - "loss": 0.6364, - "step": 192760 - }, - { - "epoch": 1.7041496490390564, - "grad_norm": 3.7410945892333984, - "learning_rate": 2.159750584934906e-05, - "loss": 0.6099, - "step": 192770 - }, - { - "epoch": 1.7042380522993688, - "grad_norm": 2.8992271423339844, - "learning_rate": 2.1596032461677187e-05, - "loss": 0.5525, - "step": 192780 - }, - { - "epoch": 1.704326455559681, - "grad_norm": 2.923013210296631, - "learning_rate": 2.1594559074005315e-05, - "loss": 0.6137, - "step": 192790 - }, - { - "epoch": 1.7044148588199932, - "grad_norm": 7.2937726974487305, - "learning_rate": 2.1593085686333447e-05, - "loss": 0.7636, - "step": 192800 - }, - { - "epoch": 1.7045032620803056, - "grad_norm": 1.2805955410003662, - "learning_rate": 2.1591612298661576e-05, - "loss": 0.6087, - "step": 192810 - }, - { - "epoch": 1.704591665340618, - "grad_norm": 1.9098401069641113, - "learning_rate": 2.1590138910989704e-05, - "loss": 0.5969, - "step": 192820 - }, - { - "epoch": 1.70468006860093, - "grad_norm": 1.8037567138671875, - "learning_rate": 2.1588665523317832e-05, - "loss": 0.6318, - "step": 192830 - }, - { - "epoch": 1.7047684718612421, - "grad_norm": 3.3513031005859375, - "learning_rate": 2.1587192135645964e-05, - "loss": 0.5824, - "step": 192840 - }, - { - "epoch": 1.7048568751215545, - "grad_norm": 2.465480327606201, - "learning_rate": 2.1585718747974092e-05, - "loss": 0.606, - "step": 192850 - }, - { - "epoch": 1.7049452783818668, - "grad_norm": 7.786761283874512, - "learning_rate": 2.158424536030222e-05, - "loss": 0.5148, - "step": 192860 - }, - { - "epoch": 1.705033681642179, - "grad_norm": 1.2157894372940063, - "learning_rate": 2.1582771972630352e-05, - "loss": 0.5365, - "step": 192870 - }, - { - "epoch": 1.705122084902491, - "grad_norm": 10.837203025817871, - "learning_rate": 2.158129858495848e-05, - "loss": 0.5598, - "step": 192880 - }, - { - "epoch": 1.7052104881628034, - "grad_norm": 8.102943420410156, - "learning_rate": 2.157982519728661e-05, - "loss": 0.6252, - "step": 192890 - }, - { - "epoch": 1.7052988914231157, - "grad_norm": 1.599469542503357, - "learning_rate": 2.1578351809614737e-05, - "loss": 0.5073, - "step": 192900 - }, - { - "epoch": 1.7053872946834279, - "grad_norm": 1.0777111053466797, - "learning_rate": 2.157687842194287e-05, - "loss": 0.6193, - "step": 192910 - }, - { - "epoch": 1.7054756979437402, - "grad_norm": 1.2110395431518555, - "learning_rate": 2.1575405034270998e-05, - "loss": 0.6007, - "step": 192920 - }, - { - "epoch": 1.7055641012040526, - "grad_norm": 2.502366065979004, - "learning_rate": 2.1573931646599126e-05, - "loss": 0.5691, - "step": 192930 - }, - { - "epoch": 1.7056525044643647, - "grad_norm": 5.271129131317139, - "learning_rate": 2.1572458258927254e-05, - "loss": 0.5525, - "step": 192940 - }, - { - "epoch": 1.7057409077246768, - "grad_norm": 1.9179210662841797, - "learning_rate": 2.1570984871255386e-05, - "loss": 0.5809, - "step": 192950 - }, - { - "epoch": 1.7058293109849891, - "grad_norm": 2.8024744987487793, - "learning_rate": 2.1569511483583514e-05, - "loss": 0.6545, - "step": 192960 - }, - { - "epoch": 1.7059177142453015, - "grad_norm": 1.679518699645996, - "learning_rate": 2.1568038095911643e-05, - "loss": 0.6099, - "step": 192970 - }, - { - "epoch": 1.7060061175056136, - "grad_norm": 2.3610336780548096, - "learning_rate": 2.1566564708239774e-05, - "loss": 0.6501, - "step": 192980 - }, - { - "epoch": 1.7060945207659257, - "grad_norm": 2.0149097442626953, - "learning_rate": 2.1565091320567903e-05, - "loss": 0.6572, - "step": 192990 - }, - { - "epoch": 1.706182924026238, - "grad_norm": 1.6732184886932373, - "learning_rate": 2.156361793289603e-05, - "loss": 0.5489, - "step": 193000 - }, - { - "epoch": 1.7062713272865504, - "grad_norm": 8.428173065185547, - "learning_rate": 2.156214454522416e-05, - "loss": 0.5129, - "step": 193010 - }, - { - "epoch": 1.7063597305468625, - "grad_norm": 13.745948791503906, - "learning_rate": 2.156067115755229e-05, - "loss": 0.5867, - "step": 193020 - }, - { - "epoch": 1.7064481338071749, - "grad_norm": 4.421304225921631, - "learning_rate": 2.155919776988042e-05, - "loss": 0.4689, - "step": 193030 - }, - { - "epoch": 1.7065365370674872, - "grad_norm": 1.6220214366912842, - "learning_rate": 2.1557724382208548e-05, - "loss": 0.6558, - "step": 193040 - }, - { - "epoch": 1.7066249403277993, - "grad_norm": 1.6485824584960938, - "learning_rate": 2.155625099453668e-05, - "loss": 0.5118, - "step": 193050 - }, - { - "epoch": 1.7067133435881114, - "grad_norm": 2.4856295585632324, - "learning_rate": 2.1554777606864808e-05, - "loss": 0.7231, - "step": 193060 - }, - { - "epoch": 1.7068017468484238, - "grad_norm": 1.8700010776519775, - "learning_rate": 2.1553304219192936e-05, - "loss": 0.5572, - "step": 193070 - }, - { - "epoch": 1.7068901501087361, - "grad_norm": 2.4165444374084473, - "learning_rate": 2.1551830831521068e-05, - "loss": 0.6112, - "step": 193080 - }, - { - "epoch": 1.7069785533690482, - "grad_norm": 1.8147107362747192, - "learning_rate": 2.1550357443849197e-05, - "loss": 0.6198, - "step": 193090 - }, - { - "epoch": 1.7070669566293604, - "grad_norm": 1.855204463005066, - "learning_rate": 2.1548884056177325e-05, - "loss": 0.5507, - "step": 193100 - }, - { - "epoch": 1.7071553598896727, - "grad_norm": 4.025112628936768, - "learning_rate": 2.1547410668505457e-05, - "loss": 0.5764, - "step": 193110 - }, - { - "epoch": 1.707243763149985, - "grad_norm": 21.12237548828125, - "learning_rate": 2.1545937280833585e-05, - "loss": 0.7226, - "step": 193120 - }, - { - "epoch": 1.7073321664102972, - "grad_norm": 1.2159252166748047, - "learning_rate": 2.1544463893161713e-05, - "loss": 0.4942, - "step": 193130 - }, - { - "epoch": 1.7074205696706093, - "grad_norm": 2.432981491088867, - "learning_rate": 2.1542990505489845e-05, - "loss": 0.5872, - "step": 193140 - }, - { - "epoch": 1.7075089729309219, - "grad_norm": 1.2240477800369263, - "learning_rate": 2.1541517117817973e-05, - "loss": 0.516, - "step": 193150 - }, - { - "epoch": 1.707597376191234, - "grad_norm": 8.193679809570312, - "learning_rate": 2.1540043730146102e-05, - "loss": 0.6525, - "step": 193160 - }, - { - "epoch": 1.707685779451546, - "grad_norm": 1.132754921913147, - "learning_rate": 2.1538570342474234e-05, - "loss": 0.4889, - "step": 193170 - }, - { - "epoch": 1.7077741827118584, - "grad_norm": 2.784849166870117, - "learning_rate": 2.1537096954802362e-05, - "loss": 0.4858, - "step": 193180 - }, - { - "epoch": 1.7078625859721708, - "grad_norm": 3.904958486557007, - "learning_rate": 2.153562356713049e-05, - "loss": 0.5982, - "step": 193190 - }, - { - "epoch": 1.707950989232483, - "grad_norm": 0.7412169575691223, - "learning_rate": 2.1534150179458622e-05, - "loss": 0.5165, - "step": 193200 - }, - { - "epoch": 1.708039392492795, - "grad_norm": 1.7279950380325317, - "learning_rate": 2.153267679178675e-05, - "loss": 0.5379, - "step": 193210 - }, - { - "epoch": 1.7081277957531074, - "grad_norm": 1.994883418083191, - "learning_rate": 2.153120340411488e-05, - "loss": 0.7022, - "step": 193220 - }, - { - "epoch": 1.7082161990134197, - "grad_norm": 10.715351104736328, - "learning_rate": 2.152973001644301e-05, - "loss": 0.5129, - "step": 193230 - }, - { - "epoch": 1.7083046022737318, - "grad_norm": 2.887328624725342, - "learning_rate": 2.152825662877114e-05, - "loss": 0.555, - "step": 193240 - }, - { - "epoch": 1.708393005534044, - "grad_norm": 2.088953733444214, - "learning_rate": 2.1526783241099267e-05, - "loss": 0.5347, - "step": 193250 - }, - { - "epoch": 1.7084814087943563, - "grad_norm": 1.7319414615631104, - "learning_rate": 2.1525309853427395e-05, - "loss": 0.4997, - "step": 193260 - }, - { - "epoch": 1.7085698120546686, - "grad_norm": 2.9857470989227295, - "learning_rate": 2.1523836465755527e-05, - "loss": 0.7082, - "step": 193270 - }, - { - "epoch": 1.7086582153149807, - "grad_norm": 2.421107053756714, - "learning_rate": 2.1522363078083656e-05, - "loss": 0.6149, - "step": 193280 - }, - { - "epoch": 1.708746618575293, - "grad_norm": 4.203795433044434, - "learning_rate": 2.1520889690411784e-05, - "loss": 0.422, - "step": 193290 - }, - { - "epoch": 1.7088350218356054, - "grad_norm": 2.44531512260437, - "learning_rate": 2.1519416302739912e-05, - "loss": 0.6336, - "step": 193300 - }, - { - "epoch": 1.7089234250959175, - "grad_norm": 0.8148242235183716, - "learning_rate": 2.1517942915068044e-05, - "loss": 0.5725, - "step": 193310 - }, - { - "epoch": 1.7090118283562297, - "grad_norm": 2.558438777923584, - "learning_rate": 2.1516469527396172e-05, - "loss": 0.7034, - "step": 193320 - }, - { - "epoch": 1.709100231616542, - "grad_norm": 1.6167006492614746, - "learning_rate": 2.15149961397243e-05, - "loss": 0.6927, - "step": 193330 - }, - { - "epoch": 1.7091886348768544, - "grad_norm": 3.192253589630127, - "learning_rate": 2.1513522752052432e-05, - "loss": 0.4504, - "step": 193340 - }, - { - "epoch": 1.7092770381371665, - "grad_norm": 2.2594075202941895, - "learning_rate": 2.151204936438056e-05, - "loss": 0.6758, - "step": 193350 - }, - { - "epoch": 1.7093654413974786, - "grad_norm": 1.3042347431182861, - "learning_rate": 2.151057597670869e-05, - "loss": 0.7211, - "step": 193360 - }, - { - "epoch": 1.709453844657791, - "grad_norm": 8.780888557434082, - "learning_rate": 2.1509102589036818e-05, - "loss": 0.6545, - "step": 193370 - }, - { - "epoch": 1.7095422479181033, - "grad_norm": 2.702665090560913, - "learning_rate": 2.150762920136495e-05, - "loss": 0.7298, - "step": 193380 - }, - { - "epoch": 1.7096306511784154, - "grad_norm": 5.596799850463867, - "learning_rate": 2.1506155813693078e-05, - "loss": 0.5567, - "step": 193390 - }, - { - "epoch": 1.7097190544387277, - "grad_norm": 5.592110633850098, - "learning_rate": 2.1504682426021206e-05, - "loss": 0.5826, - "step": 193400 - }, - { - "epoch": 1.70980745769904, - "grad_norm": 6.443530559539795, - "learning_rate": 2.1503209038349334e-05, - "loss": 0.602, - "step": 193410 - }, - { - "epoch": 1.7098958609593522, - "grad_norm": 2.6245791912078857, - "learning_rate": 2.1501735650677466e-05, - "loss": 0.5969, - "step": 193420 - }, - { - "epoch": 1.7099842642196643, - "grad_norm": 1.3029557466506958, - "learning_rate": 2.1500262263005594e-05, - "loss": 0.6485, - "step": 193430 - }, - { - "epoch": 1.7100726674799767, - "grad_norm": 4.779838562011719, - "learning_rate": 2.1498788875333723e-05, - "loss": 0.6585, - "step": 193440 - }, - { - "epoch": 1.710161070740289, - "grad_norm": 3.536142349243164, - "learning_rate": 2.1497315487661855e-05, - "loss": 0.6322, - "step": 193450 - }, - { - "epoch": 1.7102494740006011, - "grad_norm": 2.38405704498291, - "learning_rate": 2.1495842099989983e-05, - "loss": 0.5583, - "step": 193460 - }, - { - "epoch": 1.7103378772609132, - "grad_norm": 2.292497396469116, - "learning_rate": 2.149436871231811e-05, - "loss": 0.5467, - "step": 193470 - }, - { - "epoch": 1.7104262805212256, - "grad_norm": 3.9541614055633545, - "learning_rate": 2.149289532464624e-05, - "loss": 0.7388, - "step": 193480 - }, - { - "epoch": 1.710514683781538, - "grad_norm": 12.266168594360352, - "learning_rate": 2.149142193697437e-05, - "loss": 0.5763, - "step": 193490 - }, - { - "epoch": 1.71060308704185, - "grad_norm": 1.5108264684677124, - "learning_rate": 2.14899485493025e-05, - "loss": 0.6221, - "step": 193500 - }, - { - "epoch": 1.7106914903021624, - "grad_norm": 2.0176055431365967, - "learning_rate": 2.1488475161630628e-05, - "loss": 0.6865, - "step": 193510 - }, - { - "epoch": 1.7107798935624747, - "grad_norm": 5.445321083068848, - "learning_rate": 2.148700177395876e-05, - "loss": 0.4985, - "step": 193520 - }, - { - "epoch": 1.7108682968227868, - "grad_norm": 1.921905755996704, - "learning_rate": 2.1485528386286888e-05, - "loss": 0.7662, - "step": 193530 - }, - { - "epoch": 1.710956700083099, - "grad_norm": 3.3987882137298584, - "learning_rate": 2.1484054998615017e-05, - "loss": 0.5557, - "step": 193540 - }, - { - "epoch": 1.7110451033434113, - "grad_norm": 2.8329806327819824, - "learning_rate": 2.1482581610943145e-05, - "loss": 0.5807, - "step": 193550 - }, - { - "epoch": 1.7111335066037237, - "grad_norm": 2.9834933280944824, - "learning_rate": 2.1481108223271277e-05, - "loss": 0.6431, - "step": 193560 - }, - { - "epoch": 1.7112219098640358, - "grad_norm": 3.6295862197875977, - "learning_rate": 2.1479634835599405e-05, - "loss": 0.7418, - "step": 193570 - }, - { - "epoch": 1.711310313124348, - "grad_norm": 4.6869025230407715, - "learning_rate": 2.1478161447927533e-05, - "loss": 0.5437, - "step": 193580 - }, - { - "epoch": 1.7113987163846602, - "grad_norm": 2.265542507171631, - "learning_rate": 2.147668806025566e-05, - "loss": 0.5905, - "step": 193590 - }, - { - "epoch": 1.7114871196449726, - "grad_norm": 5.112244129180908, - "learning_rate": 2.1475214672583793e-05, - "loss": 0.5551, - "step": 193600 - }, - { - "epoch": 1.7115755229052847, - "grad_norm": 3.033071756362915, - "learning_rate": 2.1473741284911922e-05, - "loss": 0.6883, - "step": 193610 - }, - { - "epoch": 1.711663926165597, - "grad_norm": 1.3255032300949097, - "learning_rate": 2.147226789724005e-05, - "loss": 0.7637, - "step": 193620 - }, - { - "epoch": 1.7117523294259094, - "grad_norm": 1.6674469709396362, - "learning_rate": 2.1470794509568182e-05, - "loss": 0.6135, - "step": 193630 - }, - { - "epoch": 1.7118407326862215, - "grad_norm": 1.3110415935516357, - "learning_rate": 2.146932112189631e-05, - "loss": 0.602, - "step": 193640 - }, - { - "epoch": 1.7119291359465336, - "grad_norm": 2.6006574630737305, - "learning_rate": 2.146784773422444e-05, - "loss": 0.6407, - "step": 193650 - }, - { - "epoch": 1.712017539206846, - "grad_norm": 1.43293297290802, - "learning_rate": 2.1466374346552567e-05, - "loss": 0.5702, - "step": 193660 - }, - { - "epoch": 1.7121059424671583, - "grad_norm": 11.81348991394043, - "learning_rate": 2.14649009588807e-05, - "loss": 0.6085, - "step": 193670 - }, - { - "epoch": 1.7121943457274704, - "grad_norm": 1.8563233613967896, - "learning_rate": 2.1463427571208827e-05, - "loss": 0.5778, - "step": 193680 - }, - { - "epoch": 1.7122827489877825, - "grad_norm": 1.7744039297103882, - "learning_rate": 2.1461954183536955e-05, - "loss": 0.4811, - "step": 193690 - }, - { - "epoch": 1.7123711522480949, - "grad_norm": 1.2777608633041382, - "learning_rate": 2.1460480795865087e-05, - "loss": 0.5048, - "step": 193700 - }, - { - "epoch": 1.7124595555084072, - "grad_norm": 2.096623659133911, - "learning_rate": 2.1459007408193215e-05, - "loss": 0.6469, - "step": 193710 - }, - { - "epoch": 1.7125479587687193, - "grad_norm": 2.577944755554199, - "learning_rate": 2.1457534020521344e-05, - "loss": 0.6443, - "step": 193720 - }, - { - "epoch": 1.7126363620290315, - "grad_norm": 7.665952205657959, - "learning_rate": 2.1456060632849472e-05, - "loss": 0.6474, - "step": 193730 - }, - { - "epoch": 1.712724765289344, - "grad_norm": 2.8538498878479004, - "learning_rate": 2.1454587245177604e-05, - "loss": 0.6347, - "step": 193740 - }, - { - "epoch": 1.7128131685496562, - "grad_norm": 1.5761281251907349, - "learning_rate": 2.1453113857505732e-05, - "loss": 0.4585, - "step": 193750 - }, - { - "epoch": 1.7129015718099683, - "grad_norm": 9.394773483276367, - "learning_rate": 2.145164046983386e-05, - "loss": 0.5066, - "step": 193760 - }, - { - "epoch": 1.7129899750702806, - "grad_norm": 1.5645577907562256, - "learning_rate": 2.145016708216199e-05, - "loss": 0.5753, - "step": 193770 - }, - { - "epoch": 1.713078378330593, - "grad_norm": 4.080010414123535, - "learning_rate": 2.144869369449012e-05, - "loss": 0.5519, - "step": 193780 - }, - { - "epoch": 1.713166781590905, - "grad_norm": 6.62410306930542, - "learning_rate": 2.144722030681825e-05, - "loss": 0.6189, - "step": 193790 - }, - { - "epoch": 1.7132551848512172, - "grad_norm": 9.186639785766602, - "learning_rate": 2.1445746919146377e-05, - "loss": 0.5834, - "step": 193800 - }, - { - "epoch": 1.7133435881115295, - "grad_norm": 8.02290153503418, - "learning_rate": 2.144427353147451e-05, - "loss": 0.4295, - "step": 193810 - }, - { - "epoch": 1.7134319913718419, - "grad_norm": 2.2746243476867676, - "learning_rate": 2.1442800143802638e-05, - "loss": 0.5328, - "step": 193820 - }, - { - "epoch": 1.713520394632154, - "grad_norm": 1.81694757938385, - "learning_rate": 2.1441326756130766e-05, - "loss": 0.6152, - "step": 193830 - }, - { - "epoch": 1.7136087978924661, - "grad_norm": 10.199980735778809, - "learning_rate": 2.1439853368458894e-05, - "loss": 0.5066, - "step": 193840 - }, - { - "epoch": 1.7136972011527785, - "grad_norm": 1.051020622253418, - "learning_rate": 2.1438379980787026e-05, - "loss": 0.6798, - "step": 193850 - }, - { - "epoch": 1.7137856044130908, - "grad_norm": 7.369716644287109, - "learning_rate": 2.1436906593115154e-05, - "loss": 0.611, - "step": 193860 - }, - { - "epoch": 1.713874007673403, - "grad_norm": 3.89119553565979, - "learning_rate": 2.1435433205443283e-05, - "loss": 0.4936, - "step": 193870 - }, - { - "epoch": 1.7139624109337153, - "grad_norm": 2.684659004211426, - "learning_rate": 2.1433959817771414e-05, - "loss": 0.5614, - "step": 193880 - }, - { - "epoch": 1.7140508141940276, - "grad_norm": 1.016061782836914, - "learning_rate": 2.1432486430099543e-05, - "loss": 0.552, - "step": 193890 - }, - { - "epoch": 1.7141392174543397, - "grad_norm": 4.650036334991455, - "learning_rate": 2.143101304242767e-05, - "loss": 0.6651, - "step": 193900 - }, - { - "epoch": 1.7142276207146518, - "grad_norm": 3.340902328491211, - "learning_rate": 2.14295396547558e-05, - "loss": 0.5723, - "step": 193910 - }, - { - "epoch": 1.7143160239749642, - "grad_norm": 2.3471693992614746, - "learning_rate": 2.142806626708393e-05, - "loss": 0.6426, - "step": 193920 - }, - { - "epoch": 1.7144044272352765, - "grad_norm": 2.278501033782959, - "learning_rate": 2.142659287941206e-05, - "loss": 0.5446, - "step": 193930 - }, - { - "epoch": 1.7144928304955886, - "grad_norm": 0.9082626700401306, - "learning_rate": 2.1425119491740188e-05, - "loss": 0.5715, - "step": 193940 - }, - { - "epoch": 1.7145812337559008, - "grad_norm": 2.4361538887023926, - "learning_rate": 2.1423646104068316e-05, - "loss": 0.4883, - "step": 193950 - }, - { - "epoch": 1.714669637016213, - "grad_norm": 2.2103195190429688, - "learning_rate": 2.1422172716396448e-05, - "loss": 0.7856, - "step": 193960 - }, - { - "epoch": 1.7147580402765255, - "grad_norm": 1.3517742156982422, - "learning_rate": 2.1420699328724576e-05, - "loss": 0.6893, - "step": 193970 - }, - { - "epoch": 1.7148464435368376, - "grad_norm": 1.9715032577514648, - "learning_rate": 2.1419225941052705e-05, - "loss": 0.6235, - "step": 193980 - }, - { - "epoch": 1.71493484679715, - "grad_norm": 3.0068297386169434, - "learning_rate": 2.1417752553380836e-05, - "loss": 0.6845, - "step": 193990 - }, - { - "epoch": 1.7150232500574623, - "grad_norm": 4.430449962615967, - "learning_rate": 2.1416279165708965e-05, - "loss": 0.4986, - "step": 194000 - }, - { - "epoch": 1.7151116533177744, - "grad_norm": 1.213348150253296, - "learning_rate": 2.1414805778037093e-05, - "loss": 0.741, - "step": 194010 - }, - { - "epoch": 1.7152000565780865, - "grad_norm": 1.1817817687988281, - "learning_rate": 2.1413332390365225e-05, - "loss": 0.4849, - "step": 194020 - }, - { - "epoch": 1.7152884598383988, - "grad_norm": 8.518665313720703, - "learning_rate": 2.1411859002693353e-05, - "loss": 0.5765, - "step": 194030 - }, - { - "epoch": 1.7153768630987112, - "grad_norm": 7.211263656616211, - "learning_rate": 2.141038561502148e-05, - "loss": 0.6467, - "step": 194040 - }, - { - "epoch": 1.7154652663590233, - "grad_norm": 5.086142539978027, - "learning_rate": 2.1408912227349613e-05, - "loss": 0.5901, - "step": 194050 - }, - { - "epoch": 1.7155536696193354, - "grad_norm": 1.3066221475601196, - "learning_rate": 2.1407438839677742e-05, - "loss": 0.5527, - "step": 194060 - }, - { - "epoch": 1.7156420728796478, - "grad_norm": 5.550687313079834, - "learning_rate": 2.140596545200587e-05, - "loss": 0.6424, - "step": 194070 - }, - { - "epoch": 1.71573047613996, - "grad_norm": 2.44869327545166, - "learning_rate": 2.1404492064334002e-05, - "loss": 0.535, - "step": 194080 - }, - { - "epoch": 1.7158188794002722, - "grad_norm": 1.9060211181640625, - "learning_rate": 2.140301867666213e-05, - "loss": 0.6116, - "step": 194090 - }, - { - "epoch": 1.7159072826605846, - "grad_norm": 4.8658952713012695, - "learning_rate": 2.140154528899026e-05, - "loss": 0.6883, - "step": 194100 - }, - { - "epoch": 1.715995685920897, - "grad_norm": 1.91483473777771, - "learning_rate": 2.140007190131839e-05, - "loss": 0.6174, - "step": 194110 - }, - { - "epoch": 1.716084089181209, - "grad_norm": 6.037035942077637, - "learning_rate": 2.139859851364652e-05, - "loss": 0.6025, - "step": 194120 - }, - { - "epoch": 1.7161724924415211, - "grad_norm": 7.2498779296875, - "learning_rate": 2.1397125125974647e-05, - "loss": 0.6472, - "step": 194130 - }, - { - "epoch": 1.7162608957018335, - "grad_norm": 7.297693252563477, - "learning_rate": 2.139565173830278e-05, - "loss": 0.5235, - "step": 194140 - }, - { - "epoch": 1.7163492989621458, - "grad_norm": 4.959930896759033, - "learning_rate": 2.1394178350630907e-05, - "loss": 0.6185, - "step": 194150 - }, - { - "epoch": 1.716437702222458, - "grad_norm": 1.1903307437896729, - "learning_rate": 2.1392704962959035e-05, - "loss": 0.5407, - "step": 194160 - }, - { - "epoch": 1.71652610548277, - "grad_norm": 2.4141032695770264, - "learning_rate": 2.1391231575287167e-05, - "loss": 0.7523, - "step": 194170 - }, - { - "epoch": 1.7166145087430824, - "grad_norm": 3.3654158115386963, - "learning_rate": 2.1389758187615296e-05, - "loss": 0.7131, - "step": 194180 - }, - { - "epoch": 1.7167029120033948, - "grad_norm": 5.278143882751465, - "learning_rate": 2.1388284799943424e-05, - "loss": 0.5624, - "step": 194190 - }, - { - "epoch": 1.7167913152637069, - "grad_norm": 1.1827096939086914, - "learning_rate": 2.1386811412271552e-05, - "loss": 0.7165, - "step": 194200 - }, - { - "epoch": 1.7168797185240192, - "grad_norm": 1.5173828601837158, - "learning_rate": 2.1385338024599684e-05, - "loss": 0.6331, - "step": 194210 - }, - { - "epoch": 1.7169681217843316, - "grad_norm": 1.3472874164581299, - "learning_rate": 2.1383864636927812e-05, - "loss": 0.5395, - "step": 194220 - }, - { - "epoch": 1.7170565250446437, - "grad_norm": 1.7874771356582642, - "learning_rate": 2.138239124925594e-05, - "loss": 0.656, - "step": 194230 - }, - { - "epoch": 1.7171449283049558, - "grad_norm": 2.1612870693206787, - "learning_rate": 2.138091786158407e-05, - "loss": 0.6968, - "step": 194240 - }, - { - "epoch": 1.7172333315652681, - "grad_norm": 2.093449354171753, - "learning_rate": 2.13794444739122e-05, - "loss": 0.5092, - "step": 194250 - }, - { - "epoch": 1.7173217348255805, - "grad_norm": 1.7155430316925049, - "learning_rate": 2.137797108624033e-05, - "loss": 0.5043, - "step": 194260 - }, - { - "epoch": 1.7174101380858926, - "grad_norm": 0.9649681448936462, - "learning_rate": 2.1376497698568458e-05, - "loss": 0.4129, - "step": 194270 - }, - { - "epoch": 1.7174985413462047, - "grad_norm": 1.9646459817886353, - "learning_rate": 2.137502431089659e-05, - "loss": 0.5339, - "step": 194280 - }, - { - "epoch": 1.717586944606517, - "grad_norm": 3.8654212951660156, - "learning_rate": 2.1373550923224718e-05, - "loss": 0.4797, - "step": 194290 - }, - { - "epoch": 1.7176753478668294, - "grad_norm": 1.408607840538025, - "learning_rate": 2.1372077535552846e-05, - "loss": 0.6064, - "step": 194300 - }, - { - "epoch": 1.7177637511271415, - "grad_norm": 13.348353385925293, - "learning_rate": 2.1370604147880974e-05, - "loss": 0.6391, - "step": 194310 - }, - { - "epoch": 1.7178521543874536, - "grad_norm": 2.575873851776123, - "learning_rate": 2.1369130760209106e-05, - "loss": 0.4392, - "step": 194320 - }, - { - "epoch": 1.7179405576477662, - "grad_norm": 3.7794172763824463, - "learning_rate": 2.1367657372537234e-05, - "loss": 0.5417, - "step": 194330 - }, - { - "epoch": 1.7180289609080783, - "grad_norm": 2.6534876823425293, - "learning_rate": 2.1366183984865363e-05, - "loss": 0.6409, - "step": 194340 - }, - { - "epoch": 1.7181173641683904, - "grad_norm": 3.612494707107544, - "learning_rate": 2.1364710597193494e-05, - "loss": 0.4685, - "step": 194350 - }, - { - "epoch": 1.7182057674287028, - "grad_norm": 6.548025608062744, - "learning_rate": 2.1363237209521623e-05, - "loss": 0.676, - "step": 194360 - }, - { - "epoch": 1.7182941706890151, - "grad_norm": 5.572389125823975, - "learning_rate": 2.136176382184975e-05, - "loss": 0.5471, - "step": 194370 - }, - { - "epoch": 1.7183825739493273, - "grad_norm": 14.386319160461426, - "learning_rate": 2.136029043417788e-05, - "loss": 0.5738, - "step": 194380 - }, - { - "epoch": 1.7184709772096394, - "grad_norm": 1.8911259174346924, - "learning_rate": 2.135881704650601e-05, - "loss": 0.6273, - "step": 194390 - }, - { - "epoch": 1.7185593804699517, - "grad_norm": 2.1299264430999756, - "learning_rate": 2.135734365883414e-05, - "loss": 0.5588, - "step": 194400 - }, - { - "epoch": 1.718647783730264, - "grad_norm": 1.7233922481536865, - "learning_rate": 2.1355870271162268e-05, - "loss": 0.6437, - "step": 194410 - }, - { - "epoch": 1.7187361869905762, - "grad_norm": 2.2364232540130615, - "learning_rate": 2.1354396883490396e-05, - "loss": 0.586, - "step": 194420 - }, - { - "epoch": 1.7188245902508883, - "grad_norm": 1.1255460977554321, - "learning_rate": 2.1352923495818528e-05, - "loss": 0.5092, - "step": 194430 - }, - { - "epoch": 1.7189129935112009, - "grad_norm": 1.157018780708313, - "learning_rate": 2.1351450108146656e-05, - "loss": 0.5651, - "step": 194440 - }, - { - "epoch": 1.719001396771513, - "grad_norm": 5.953611850738525, - "learning_rate": 2.1349976720474785e-05, - "loss": 0.729, - "step": 194450 - }, - { - "epoch": 1.719089800031825, - "grad_norm": 2.7032785415649414, - "learning_rate": 2.1348503332802917e-05, - "loss": 0.5417, - "step": 194460 - }, - { - "epoch": 1.7191782032921374, - "grad_norm": 2.5386767387390137, - "learning_rate": 2.1347029945131045e-05, - "loss": 0.5638, - "step": 194470 - }, - { - "epoch": 1.7192666065524498, - "grad_norm": 2.9284324645996094, - "learning_rate": 2.1345556557459173e-05, - "loss": 0.5439, - "step": 194480 - }, - { - "epoch": 1.719355009812762, - "grad_norm": 3.7722442150115967, - "learning_rate": 2.13440831697873e-05, - "loss": 0.604, - "step": 194490 - }, - { - "epoch": 1.719443413073074, - "grad_norm": 1.712037444114685, - "learning_rate": 2.1342609782115433e-05, - "loss": 0.5204, - "step": 194500 - }, - { - "epoch": 1.7195318163333864, - "grad_norm": 2.1617226600646973, - "learning_rate": 2.1341136394443562e-05, - "loss": 0.5705, - "step": 194510 - }, - { - "epoch": 1.7196202195936987, - "grad_norm": 3.638930320739746, - "learning_rate": 2.133966300677169e-05, - "loss": 0.5451, - "step": 194520 - }, - { - "epoch": 1.7197086228540108, - "grad_norm": 5.074330806732178, - "learning_rate": 2.133818961909982e-05, - "loss": 0.5068, - "step": 194530 - }, - { - "epoch": 1.719797026114323, - "grad_norm": 2.370124340057373, - "learning_rate": 2.133671623142795e-05, - "loss": 0.4832, - "step": 194540 - }, - { - "epoch": 1.7198854293746353, - "grad_norm": 7.430358409881592, - "learning_rate": 2.133524284375608e-05, - "loss": 0.5092, - "step": 194550 - }, - { - "epoch": 1.7199738326349476, - "grad_norm": 2.030440330505371, - "learning_rate": 2.1333769456084207e-05, - "loss": 0.5819, - "step": 194560 - }, - { - "epoch": 1.7200622358952598, - "grad_norm": 5.374444961547852, - "learning_rate": 2.133229606841234e-05, - "loss": 0.6293, - "step": 194570 - }, - { - "epoch": 1.720150639155572, - "grad_norm": 1.5253618955612183, - "learning_rate": 2.1330822680740467e-05, - "loss": 0.6682, - "step": 194580 - }, - { - "epoch": 1.7202390424158844, - "grad_norm": 3.1249170303344727, - "learning_rate": 2.1329349293068595e-05, - "loss": 0.5649, - "step": 194590 - }, - { - "epoch": 1.7203274456761966, - "grad_norm": 3.138165235519409, - "learning_rate": 2.1327875905396724e-05, - "loss": 0.5689, - "step": 194600 - }, - { - "epoch": 1.7204158489365087, - "grad_norm": 2.1322529315948486, - "learning_rate": 2.1326402517724855e-05, - "loss": 0.7504, - "step": 194610 - }, - { - "epoch": 1.720504252196821, - "grad_norm": 2.534820079803467, - "learning_rate": 2.1324929130052984e-05, - "loss": 0.6296, - "step": 194620 - }, - { - "epoch": 1.7205926554571334, - "grad_norm": 1.5047125816345215, - "learning_rate": 2.1323455742381112e-05, - "loss": 0.4259, - "step": 194630 - }, - { - "epoch": 1.7206810587174455, - "grad_norm": 1.553313136100769, - "learning_rate": 2.1321982354709244e-05, - "loss": 0.5223, - "step": 194640 - }, - { - "epoch": 1.7207694619777576, - "grad_norm": 7.213515281677246, - "learning_rate": 2.1320508967037372e-05, - "loss": 0.577, - "step": 194650 - }, - { - "epoch": 1.72085786523807, - "grad_norm": 9.564608573913574, - "learning_rate": 2.13190355793655e-05, - "loss": 0.5102, - "step": 194660 - }, - { - "epoch": 1.7209462684983823, - "grad_norm": 7.004220485687256, - "learning_rate": 2.131756219169363e-05, - "loss": 0.6399, - "step": 194670 - }, - { - "epoch": 1.7210346717586944, - "grad_norm": 4.490629196166992, - "learning_rate": 2.131608880402176e-05, - "loss": 0.5615, - "step": 194680 - }, - { - "epoch": 1.7211230750190067, - "grad_norm": 2.593158006668091, - "learning_rate": 2.131461541634989e-05, - "loss": 0.5914, - "step": 194690 - }, - { - "epoch": 1.721211478279319, - "grad_norm": 1.4362741708755493, - "learning_rate": 2.1313142028678017e-05, - "loss": 0.5115, - "step": 194700 - }, - { - "epoch": 1.7212998815396312, - "grad_norm": 2.6229753494262695, - "learning_rate": 2.1311668641006146e-05, - "loss": 0.6462, - "step": 194710 - }, - { - "epoch": 1.7213882847999433, - "grad_norm": 2.605205535888672, - "learning_rate": 2.1310195253334277e-05, - "loss": 0.6794, - "step": 194720 - }, - { - "epoch": 1.7214766880602557, - "grad_norm": 9.741536140441895, - "learning_rate": 2.1308721865662406e-05, - "loss": 0.629, - "step": 194730 - }, - { - "epoch": 1.721565091320568, - "grad_norm": 3.437342882156372, - "learning_rate": 2.1307248477990534e-05, - "loss": 0.55, - "step": 194740 - }, - { - "epoch": 1.7216534945808801, - "grad_norm": 10.987372398376465, - "learning_rate": 2.1305775090318666e-05, - "loss": 0.5065, - "step": 194750 - }, - { - "epoch": 1.7217418978411922, - "grad_norm": 1.3480275869369507, - "learning_rate": 2.1304301702646794e-05, - "loss": 0.5772, - "step": 194760 - }, - { - "epoch": 1.7218303011015046, - "grad_norm": 4.841872692108154, - "learning_rate": 2.1302828314974923e-05, - "loss": 0.6665, - "step": 194770 - }, - { - "epoch": 1.721918704361817, - "grad_norm": 2.1356678009033203, - "learning_rate": 2.130135492730305e-05, - "loss": 0.672, - "step": 194780 - }, - { - "epoch": 1.722007107622129, - "grad_norm": 1.9766780138015747, - "learning_rate": 2.1299881539631183e-05, - "loss": 0.6122, - "step": 194790 - }, - { - "epoch": 1.7220955108824414, - "grad_norm": 2.7788548469543457, - "learning_rate": 2.129840815195931e-05, - "loss": 0.5378, - "step": 194800 - }, - { - "epoch": 1.7221839141427537, - "grad_norm": 2.4216575622558594, - "learning_rate": 2.129693476428744e-05, - "loss": 0.694, - "step": 194810 - }, - { - "epoch": 1.7222723174030659, - "grad_norm": 1.8145804405212402, - "learning_rate": 2.129546137661557e-05, - "loss": 0.619, - "step": 194820 - }, - { - "epoch": 1.722360720663378, - "grad_norm": 3.8577568531036377, - "learning_rate": 2.12939879889437e-05, - "loss": 0.6129, - "step": 194830 - }, - { - "epoch": 1.7224491239236903, - "grad_norm": 9.400105476379395, - "learning_rate": 2.1292514601271828e-05, - "loss": 0.6411, - "step": 194840 - }, - { - "epoch": 1.7225375271840027, - "grad_norm": 8.117342948913574, - "learning_rate": 2.1291041213599956e-05, - "loss": 0.5218, - "step": 194850 - }, - { - "epoch": 1.7226259304443148, - "grad_norm": 2.0788090229034424, - "learning_rate": 2.1289567825928088e-05, - "loss": 0.5991, - "step": 194860 - }, - { - "epoch": 1.722714333704627, - "grad_norm": 2.073340654373169, - "learning_rate": 2.1288094438256216e-05, - "loss": 0.5795, - "step": 194870 - }, - { - "epoch": 1.7228027369649392, - "grad_norm": 12.70473861694336, - "learning_rate": 2.1286621050584345e-05, - "loss": 0.5457, - "step": 194880 - }, - { - "epoch": 1.7228911402252516, - "grad_norm": 1.126299500465393, - "learning_rate": 2.1285147662912473e-05, - "loss": 0.5223, - "step": 194890 - }, - { - "epoch": 1.7229795434855637, - "grad_norm": 2.4783525466918945, - "learning_rate": 2.1283674275240605e-05, - "loss": 0.5983, - "step": 194900 - }, - { - "epoch": 1.7230679467458758, - "grad_norm": 1.0184361934661865, - "learning_rate": 2.1282200887568733e-05, - "loss": 0.6414, - "step": 194910 - }, - { - "epoch": 1.7231563500061884, - "grad_norm": 1.2930160760879517, - "learning_rate": 2.128072749989686e-05, - "loss": 0.5334, - "step": 194920 - }, - { - "epoch": 1.7232447532665005, - "grad_norm": 1.700271487236023, - "learning_rate": 2.1279254112224993e-05, - "loss": 0.6356, - "step": 194930 - }, - { - "epoch": 1.7233331565268126, - "grad_norm": 2.276177167892456, - "learning_rate": 2.127778072455312e-05, - "loss": 0.5209, - "step": 194940 - }, - { - "epoch": 1.723421559787125, - "grad_norm": 5.536886215209961, - "learning_rate": 2.127630733688125e-05, - "loss": 0.5808, - "step": 194950 - }, - { - "epoch": 1.7235099630474373, - "grad_norm": 5.371890068054199, - "learning_rate": 2.127483394920938e-05, - "loss": 0.6612, - "step": 194960 - }, - { - "epoch": 1.7235983663077494, - "grad_norm": 4.695826053619385, - "learning_rate": 2.127336056153751e-05, - "loss": 0.6523, - "step": 194970 - }, - { - "epoch": 1.7236867695680615, - "grad_norm": 5.159978866577148, - "learning_rate": 2.127188717386564e-05, - "loss": 0.5542, - "step": 194980 - }, - { - "epoch": 1.723775172828374, - "grad_norm": 4.265750408172607, - "learning_rate": 2.127041378619377e-05, - "loss": 0.5552, - "step": 194990 - }, - { - "epoch": 1.7238635760886862, - "grad_norm": 3.749016284942627, - "learning_rate": 2.12689403985219e-05, - "loss": 0.514, - "step": 195000 - }, - { - "epoch": 1.7239519793489984, - "grad_norm": 4.934724807739258, - "learning_rate": 2.1267467010850027e-05, - "loss": 0.6437, - "step": 195010 - }, - { - "epoch": 1.7240403826093105, - "grad_norm": 1.63783860206604, - "learning_rate": 2.126599362317816e-05, - "loss": 0.5651, - "step": 195020 - }, - { - "epoch": 1.724128785869623, - "grad_norm": 2.97623348236084, - "learning_rate": 2.1264520235506287e-05, - "loss": 0.6143, - "step": 195030 - }, - { - "epoch": 1.7242171891299352, - "grad_norm": 6.014468669891357, - "learning_rate": 2.1263046847834415e-05, - "loss": 0.542, - "step": 195040 - }, - { - "epoch": 1.7243055923902473, - "grad_norm": 3.3367233276367188, - "learning_rate": 2.1261573460162547e-05, - "loss": 0.7033, - "step": 195050 - }, - { - "epoch": 1.7243939956505596, - "grad_norm": 2.835598945617676, - "learning_rate": 2.1260100072490675e-05, - "loss": 0.5744, - "step": 195060 - }, - { - "epoch": 1.724482398910872, - "grad_norm": 5.4065680503845215, - "learning_rate": 2.1258626684818804e-05, - "loss": 0.5361, - "step": 195070 - }, - { - "epoch": 1.724570802171184, - "grad_norm": 6.708798408508301, - "learning_rate": 2.1257153297146935e-05, - "loss": 0.5408, - "step": 195080 - }, - { - "epoch": 1.7246592054314962, - "grad_norm": 2.620743751525879, - "learning_rate": 2.1255679909475064e-05, - "loss": 0.5281, - "step": 195090 - }, - { - "epoch": 1.7247476086918085, - "grad_norm": 3.543836832046509, - "learning_rate": 2.1254206521803192e-05, - "loss": 0.6236, - "step": 195100 - }, - { - "epoch": 1.7248360119521209, - "grad_norm": 0.964089035987854, - "learning_rate": 2.1252733134131324e-05, - "loss": 0.5193, - "step": 195110 - }, - { - "epoch": 1.724924415212433, - "grad_norm": 2.3067169189453125, - "learning_rate": 2.1251259746459452e-05, - "loss": 0.5311, - "step": 195120 - }, - { - "epoch": 1.7250128184727451, - "grad_norm": 9.584707260131836, - "learning_rate": 2.124978635878758e-05, - "loss": 0.5483, - "step": 195130 - }, - { - "epoch": 1.7251012217330575, - "grad_norm": 6.576661586761475, - "learning_rate": 2.124831297111571e-05, - "loss": 0.5107, - "step": 195140 - }, - { - "epoch": 1.7251896249933698, - "grad_norm": 2.2746684551239014, - "learning_rate": 2.124683958344384e-05, - "loss": 0.5469, - "step": 195150 - }, - { - "epoch": 1.725278028253682, - "grad_norm": 3.8254854679107666, - "learning_rate": 2.124536619577197e-05, - "loss": 0.4967, - "step": 195160 - }, - { - "epoch": 1.7253664315139943, - "grad_norm": 4.170722961425781, - "learning_rate": 2.1243892808100097e-05, - "loss": 0.6071, - "step": 195170 - }, - { - "epoch": 1.7254548347743066, - "grad_norm": 6.850671768188477, - "learning_rate": 2.1242419420428226e-05, - "loss": 0.8257, - "step": 195180 - }, - { - "epoch": 1.7255432380346187, - "grad_norm": 1.8608275651931763, - "learning_rate": 2.1240946032756358e-05, - "loss": 0.6186, - "step": 195190 - }, - { - "epoch": 1.7256316412949309, - "grad_norm": 2.613795757293701, - "learning_rate": 2.1239472645084486e-05, - "loss": 0.6071, - "step": 195200 - }, - { - "epoch": 1.7257200445552432, - "grad_norm": 3.4038357734680176, - "learning_rate": 2.1237999257412614e-05, - "loss": 0.6723, - "step": 195210 - }, - { - "epoch": 1.7258084478155555, - "grad_norm": 4.461121082305908, - "learning_rate": 2.1236525869740746e-05, - "loss": 0.6479, - "step": 195220 - }, - { - "epoch": 1.7258968510758677, - "grad_norm": 1.9412622451782227, - "learning_rate": 2.1235052482068874e-05, - "loss": 0.4966, - "step": 195230 - }, - { - "epoch": 1.7259852543361798, - "grad_norm": 1.07846999168396, - "learning_rate": 2.1233579094397003e-05, - "loss": 0.5164, - "step": 195240 - }, - { - "epoch": 1.7260736575964921, - "grad_norm": 2.48948073387146, - "learning_rate": 2.123210570672513e-05, - "loss": 0.5181, - "step": 195250 - }, - { - "epoch": 1.7261620608568045, - "grad_norm": 2.4837334156036377, - "learning_rate": 2.1230632319053263e-05, - "loss": 0.6371, - "step": 195260 - }, - { - "epoch": 1.7262504641171166, - "grad_norm": 1.6007318496704102, - "learning_rate": 2.122915893138139e-05, - "loss": 0.5292, - "step": 195270 - }, - { - "epoch": 1.726338867377429, - "grad_norm": 1.8158955574035645, - "learning_rate": 2.122768554370952e-05, - "loss": 0.6204, - "step": 195280 - }, - { - "epoch": 1.7264272706377413, - "grad_norm": 1.3074404001235962, - "learning_rate": 2.122621215603765e-05, - "loss": 0.4778, - "step": 195290 - }, - { - "epoch": 1.7265156738980534, - "grad_norm": 5.726093292236328, - "learning_rate": 2.122473876836578e-05, - "loss": 0.5694, - "step": 195300 - }, - { - "epoch": 1.7266040771583655, - "grad_norm": 4.886521816253662, - "learning_rate": 2.1223265380693908e-05, - "loss": 0.4615, - "step": 195310 - }, - { - "epoch": 1.7266924804186778, - "grad_norm": 2.211855411529541, - "learning_rate": 2.1221791993022036e-05, - "loss": 0.6406, - "step": 195320 - }, - { - "epoch": 1.7267808836789902, - "grad_norm": 0.9785746932029724, - "learning_rate": 2.1220318605350168e-05, - "loss": 0.5325, - "step": 195330 - }, - { - "epoch": 1.7268692869393023, - "grad_norm": 1.7503172159194946, - "learning_rate": 2.1218845217678296e-05, - "loss": 0.4948, - "step": 195340 - }, - { - "epoch": 1.7269576901996144, - "grad_norm": 18.286426544189453, - "learning_rate": 2.1217371830006425e-05, - "loss": 0.7423, - "step": 195350 - }, - { - "epoch": 1.7270460934599268, - "grad_norm": 1.8167378902435303, - "learning_rate": 2.1215898442334553e-05, - "loss": 0.471, - "step": 195360 - }, - { - "epoch": 1.727134496720239, - "grad_norm": 7.867949962615967, - "learning_rate": 2.1214425054662685e-05, - "loss": 0.6855, - "step": 195370 - }, - { - "epoch": 1.7272228999805512, - "grad_norm": 8.186702728271484, - "learning_rate": 2.1212951666990813e-05, - "loss": 0.615, - "step": 195380 - }, - { - "epoch": 1.7273113032408636, - "grad_norm": 9.494102478027344, - "learning_rate": 2.121147827931894e-05, - "loss": 0.5212, - "step": 195390 - }, - { - "epoch": 1.727399706501176, - "grad_norm": 2.0706982612609863, - "learning_rate": 2.1210004891647073e-05, - "loss": 0.5912, - "step": 195400 - }, - { - "epoch": 1.727488109761488, - "grad_norm": 4.010214328765869, - "learning_rate": 2.12085315039752e-05, - "loss": 0.6671, - "step": 195410 - }, - { - "epoch": 1.7275765130218002, - "grad_norm": 2.9555420875549316, - "learning_rate": 2.120705811630333e-05, - "loss": 0.6518, - "step": 195420 - }, - { - "epoch": 1.7276649162821125, - "grad_norm": 6.859008312225342, - "learning_rate": 2.120558472863146e-05, - "loss": 0.6816, - "step": 195430 - }, - { - "epoch": 1.7277533195424248, - "grad_norm": 1.4763985872268677, - "learning_rate": 2.120411134095959e-05, - "loss": 0.5363, - "step": 195440 - }, - { - "epoch": 1.727841722802737, - "grad_norm": 3.245969295501709, - "learning_rate": 2.120263795328772e-05, - "loss": 0.5138, - "step": 195450 - }, - { - "epoch": 1.727930126063049, - "grad_norm": 1.5190030336380005, - "learning_rate": 2.1201164565615847e-05, - "loss": 0.6454, - "step": 195460 - }, - { - "epoch": 1.7280185293233614, - "grad_norm": 4.291130065917969, - "learning_rate": 2.119969117794398e-05, - "loss": 0.5923, - "step": 195470 - }, - { - "epoch": 1.7281069325836738, - "grad_norm": 1.679487943649292, - "learning_rate": 2.1198217790272107e-05, - "loss": 0.6685, - "step": 195480 - }, - { - "epoch": 1.7281953358439859, - "grad_norm": 12.517325401306152, - "learning_rate": 2.1196744402600235e-05, - "loss": 0.503, - "step": 195490 - }, - { - "epoch": 1.728283739104298, - "grad_norm": 2.771240234375, - "learning_rate": 2.1195271014928364e-05, - "loss": 0.4449, - "step": 195500 - }, - { - "epoch": 1.7283721423646106, - "grad_norm": 7.492249011993408, - "learning_rate": 2.1193797627256495e-05, - "loss": 0.4944, - "step": 195510 - }, - { - "epoch": 1.7284605456249227, - "grad_norm": 2.683053970336914, - "learning_rate": 2.1192324239584624e-05, - "loss": 0.6942, - "step": 195520 - }, - { - "epoch": 1.7285489488852348, - "grad_norm": 2.708479404449463, - "learning_rate": 2.1190850851912752e-05, - "loss": 0.47, - "step": 195530 - }, - { - "epoch": 1.7286373521455471, - "grad_norm": 2.0322928428649902, - "learning_rate": 2.118937746424088e-05, - "loss": 0.681, - "step": 195540 - }, - { - "epoch": 1.7287257554058595, - "grad_norm": 2.9554851055145264, - "learning_rate": 2.1187904076569012e-05, - "loss": 0.6341, - "step": 195550 - }, - { - "epoch": 1.7288141586661716, - "grad_norm": 3.1124467849731445, - "learning_rate": 2.118643068889714e-05, - "loss": 0.7117, - "step": 195560 - }, - { - "epoch": 1.7289025619264837, - "grad_norm": 15.172920227050781, - "learning_rate": 2.118495730122527e-05, - "loss": 0.7122, - "step": 195570 - }, - { - "epoch": 1.728990965186796, - "grad_norm": 30.813711166381836, - "learning_rate": 2.11834839135534e-05, - "loss": 0.5538, - "step": 195580 - }, - { - "epoch": 1.7290793684471084, - "grad_norm": 0.8557159304618835, - "learning_rate": 2.118201052588153e-05, - "loss": 0.4769, - "step": 195590 - }, - { - "epoch": 1.7291677717074205, - "grad_norm": 3.094269275665283, - "learning_rate": 2.1180537138209657e-05, - "loss": 0.5318, - "step": 195600 - }, - { - "epoch": 1.7292561749677327, - "grad_norm": 2.590012788772583, - "learning_rate": 2.1179063750537786e-05, - "loss": 0.6028, - "step": 195610 - }, - { - "epoch": 1.7293445782280452, - "grad_norm": 2.8997645378112793, - "learning_rate": 2.1177590362865917e-05, - "loss": 0.5047, - "step": 195620 - }, - { - "epoch": 1.7294329814883573, - "grad_norm": 1.8133465051651, - "learning_rate": 2.1176116975194046e-05, - "loss": 0.4875, - "step": 195630 - }, - { - "epoch": 1.7295213847486695, - "grad_norm": 7.082368850708008, - "learning_rate": 2.1174643587522174e-05, - "loss": 0.518, - "step": 195640 - }, - { - "epoch": 1.7296097880089818, - "grad_norm": 1.881662368774414, - "learning_rate": 2.1173170199850302e-05, - "loss": 0.6293, - "step": 195650 - }, - { - "epoch": 1.7296981912692941, - "grad_norm": 1.6363252401351929, - "learning_rate": 2.1171696812178434e-05, - "loss": 0.6062, - "step": 195660 - }, - { - "epoch": 1.7297865945296063, - "grad_norm": 3.21171236038208, - "learning_rate": 2.1170223424506563e-05, - "loss": 0.5207, - "step": 195670 - }, - { - "epoch": 1.7298749977899184, - "grad_norm": 5.394152641296387, - "learning_rate": 2.116875003683469e-05, - "loss": 0.542, - "step": 195680 - }, - { - "epoch": 1.7299634010502307, - "grad_norm": 2.825296640396118, - "learning_rate": 2.1167276649162823e-05, - "loss": 0.5008, - "step": 195690 - }, - { - "epoch": 1.730051804310543, - "grad_norm": 9.6980562210083, - "learning_rate": 2.116580326149095e-05, - "loss": 0.5799, - "step": 195700 - }, - { - "epoch": 1.7301402075708552, - "grad_norm": 1.1438497304916382, - "learning_rate": 2.116432987381908e-05, - "loss": 0.7066, - "step": 195710 - }, - { - "epoch": 1.7302286108311673, - "grad_norm": 2.870929002761841, - "learning_rate": 2.1162856486147208e-05, - "loss": 0.6094, - "step": 195720 - }, - { - "epoch": 1.7303170140914796, - "grad_norm": 8.667716026306152, - "learning_rate": 2.116138309847534e-05, - "loss": 0.4956, - "step": 195730 - }, - { - "epoch": 1.730405417351792, - "grad_norm": 1.7962353229522705, - "learning_rate": 2.1159909710803468e-05, - "loss": 0.5723, - "step": 195740 - }, - { - "epoch": 1.730493820612104, - "grad_norm": 3.418757915496826, - "learning_rate": 2.1158436323131596e-05, - "loss": 0.5362, - "step": 195750 - }, - { - "epoch": 1.7305822238724164, - "grad_norm": 1.270337462425232, - "learning_rate": 2.1156962935459728e-05, - "loss": 0.5453, - "step": 195760 - }, - { - "epoch": 1.7306706271327288, - "grad_norm": 5.31475830078125, - "learning_rate": 2.1155489547787856e-05, - "loss": 0.5389, - "step": 195770 - }, - { - "epoch": 1.730759030393041, - "grad_norm": 2.0658061504364014, - "learning_rate": 2.1154016160115985e-05, - "loss": 0.4715, - "step": 195780 - }, - { - "epoch": 1.730847433653353, - "grad_norm": 1.0598411560058594, - "learning_rate": 2.1152542772444113e-05, - "loss": 0.5568, - "step": 195790 - }, - { - "epoch": 1.7309358369136654, - "grad_norm": 2.3994977474212646, - "learning_rate": 2.1151069384772245e-05, - "loss": 0.5859, - "step": 195800 - }, - { - "epoch": 1.7310242401739777, - "grad_norm": 1.717003345489502, - "learning_rate": 2.1149595997100373e-05, - "loss": 0.6225, - "step": 195810 - }, - { - "epoch": 1.7311126434342898, - "grad_norm": 1.4952044486999512, - "learning_rate": 2.11481226094285e-05, - "loss": 0.5733, - "step": 195820 - }, - { - "epoch": 1.731201046694602, - "grad_norm": 5.4516730308532715, - "learning_rate": 2.114664922175663e-05, - "loss": 0.5928, - "step": 195830 - }, - { - "epoch": 1.7312894499549143, - "grad_norm": 1.2333766222000122, - "learning_rate": 2.114517583408476e-05, - "loss": 0.5295, - "step": 195840 - }, - { - "epoch": 1.7313778532152266, - "grad_norm": 1.6348472833633423, - "learning_rate": 2.114370244641289e-05, - "loss": 0.549, - "step": 195850 - }, - { - "epoch": 1.7314662564755388, - "grad_norm": 1.1696728467941284, - "learning_rate": 2.1142229058741018e-05, - "loss": 0.5897, - "step": 195860 - }, - { - "epoch": 1.731554659735851, - "grad_norm": 1.7708359956741333, - "learning_rate": 2.114075567106915e-05, - "loss": 0.6673, - "step": 195870 - }, - { - "epoch": 1.7316430629961634, - "grad_norm": 12.028105735778809, - "learning_rate": 2.113928228339728e-05, - "loss": 0.6415, - "step": 195880 - }, - { - "epoch": 1.7317314662564756, - "grad_norm": 12.218727111816406, - "learning_rate": 2.1137808895725407e-05, - "loss": 0.5422, - "step": 195890 - }, - { - "epoch": 1.7318198695167877, - "grad_norm": 6.059319496154785, - "learning_rate": 2.113633550805354e-05, - "loss": 0.6705, - "step": 195900 - }, - { - "epoch": 1.7319082727771, - "grad_norm": 5.332113265991211, - "learning_rate": 2.1134862120381667e-05, - "loss": 0.471, - "step": 195910 - }, - { - "epoch": 1.7319966760374124, - "grad_norm": 1.3191802501678467, - "learning_rate": 2.1133388732709795e-05, - "loss": 0.514, - "step": 195920 - }, - { - "epoch": 1.7320850792977245, - "grad_norm": 1.0523746013641357, - "learning_rate": 2.1131915345037927e-05, - "loss": 0.5035, - "step": 195930 - }, - { - "epoch": 1.7321734825580366, - "grad_norm": 6.753158092498779, - "learning_rate": 2.1130441957366055e-05, - "loss": 0.5398, - "step": 195940 - }, - { - "epoch": 1.732261885818349, - "grad_norm": 4.173696517944336, - "learning_rate": 2.1128968569694184e-05, - "loss": 0.6315, - "step": 195950 - }, - { - "epoch": 1.7323502890786613, - "grad_norm": 9.682572364807129, - "learning_rate": 2.1127495182022315e-05, - "loss": 0.5357, - "step": 195960 - }, - { - "epoch": 1.7324386923389734, - "grad_norm": 4.160720348358154, - "learning_rate": 2.1126021794350444e-05, - "loss": 0.5762, - "step": 195970 - }, - { - "epoch": 1.7325270955992857, - "grad_norm": 2.6927499771118164, - "learning_rate": 2.1124548406678572e-05, - "loss": 0.7313, - "step": 195980 - }, - { - "epoch": 1.732615498859598, - "grad_norm": 1.5778392553329468, - "learning_rate": 2.1123075019006704e-05, - "loss": 0.6011, - "step": 195990 - }, - { - "epoch": 1.7327039021199102, - "grad_norm": 1.2303720712661743, - "learning_rate": 2.1121601631334832e-05, - "loss": 0.6368, - "step": 196000 - }, - { - "epoch": 1.7327923053802223, - "grad_norm": 4.567232131958008, - "learning_rate": 2.112012824366296e-05, - "loss": 0.5589, - "step": 196010 - }, - { - "epoch": 1.7328807086405347, - "grad_norm": 1.4383995532989502, - "learning_rate": 2.1118654855991092e-05, - "loss": 0.5802, - "step": 196020 - }, - { - "epoch": 1.732969111900847, - "grad_norm": 1.431085228919983, - "learning_rate": 2.111718146831922e-05, - "loss": 0.6278, - "step": 196030 - }, - { - "epoch": 1.7330575151611591, - "grad_norm": 1.2765882015228271, - "learning_rate": 2.111570808064735e-05, - "loss": 0.6553, - "step": 196040 - }, - { - "epoch": 1.7331459184214713, - "grad_norm": 2.260514974594116, - "learning_rate": 2.111423469297548e-05, - "loss": 0.5194, - "step": 196050 - }, - { - "epoch": 1.7332343216817836, - "grad_norm": 2.344637632369995, - "learning_rate": 2.111276130530361e-05, - "loss": 0.5624, - "step": 196060 - }, - { - "epoch": 1.733322724942096, - "grad_norm": 2.5157487392425537, - "learning_rate": 2.1111287917631737e-05, - "loss": 0.6148, - "step": 196070 - }, - { - "epoch": 1.733411128202408, - "grad_norm": 1.5370826721191406, - "learning_rate": 2.1109814529959866e-05, - "loss": 0.6209, - "step": 196080 - }, - { - "epoch": 1.7334995314627204, - "grad_norm": 3.3523473739624023, - "learning_rate": 2.1108341142287997e-05, - "loss": 0.5228, - "step": 196090 - }, - { - "epoch": 1.7335879347230327, - "grad_norm": 0.8930099606513977, - "learning_rate": 2.1106867754616126e-05, - "loss": 0.5291, - "step": 196100 - }, - { - "epoch": 1.7336763379833449, - "grad_norm": 1.5018413066864014, - "learning_rate": 2.1105394366944254e-05, - "loss": 0.533, - "step": 196110 - }, - { - "epoch": 1.733764741243657, - "grad_norm": 3.6461799144744873, - "learning_rate": 2.1103920979272383e-05, - "loss": 0.531, - "step": 196120 - }, - { - "epoch": 1.7338531445039693, - "grad_norm": 1.418461561203003, - "learning_rate": 2.1102447591600514e-05, - "loss": 0.4567, - "step": 196130 - }, - { - "epoch": 1.7339415477642817, - "grad_norm": 3.9982662200927734, - "learning_rate": 2.1100974203928643e-05, - "loss": 0.5638, - "step": 196140 - }, - { - "epoch": 1.7340299510245938, - "grad_norm": 1.4564203023910522, - "learning_rate": 2.109950081625677e-05, - "loss": 0.6685, - "step": 196150 - }, - { - "epoch": 1.734118354284906, - "grad_norm": 7.852879047393799, - "learning_rate": 2.1098027428584903e-05, - "loss": 0.5937, - "step": 196160 - }, - { - "epoch": 1.7342067575452182, - "grad_norm": 3.3024935722351074, - "learning_rate": 2.109655404091303e-05, - "loss": 0.557, - "step": 196170 - }, - { - "epoch": 1.7342951608055306, - "grad_norm": 6.282588958740234, - "learning_rate": 2.109508065324116e-05, - "loss": 0.602, - "step": 196180 - }, - { - "epoch": 1.7343835640658427, - "grad_norm": 2.669245481491089, - "learning_rate": 2.1093607265569288e-05, - "loss": 0.7253, - "step": 196190 - }, - { - "epoch": 1.7344719673261548, - "grad_norm": 1.9435648918151855, - "learning_rate": 2.109213387789742e-05, - "loss": 0.6093, - "step": 196200 - }, - { - "epoch": 1.7345603705864674, - "grad_norm": 1.8497153520584106, - "learning_rate": 2.1090660490225548e-05, - "loss": 0.611, - "step": 196210 - }, - { - "epoch": 1.7346487738467795, - "grad_norm": 3.0626959800720215, - "learning_rate": 2.1089187102553676e-05, - "loss": 0.5759, - "step": 196220 - }, - { - "epoch": 1.7347371771070916, - "grad_norm": 1.7463003396987915, - "learning_rate": 2.1087713714881808e-05, - "loss": 0.5655, - "step": 196230 - }, - { - "epoch": 1.734825580367404, - "grad_norm": 3.1084718704223633, - "learning_rate": 2.1086240327209936e-05, - "loss": 0.6622, - "step": 196240 - }, - { - "epoch": 1.7349139836277163, - "grad_norm": 5.808989524841309, - "learning_rate": 2.1084766939538065e-05, - "loss": 0.6504, - "step": 196250 - }, - { - "epoch": 1.7350023868880284, - "grad_norm": 1.4292551279067993, - "learning_rate": 2.1083293551866193e-05, - "loss": 0.6286, - "step": 196260 - }, - { - "epoch": 1.7350907901483406, - "grad_norm": 2.5943634510040283, - "learning_rate": 2.1081820164194325e-05, - "loss": 0.4967, - "step": 196270 - }, - { - "epoch": 1.735179193408653, - "grad_norm": 5.778375148773193, - "learning_rate": 2.1080346776522453e-05, - "loss": 0.6326, - "step": 196280 - }, - { - "epoch": 1.7352675966689652, - "grad_norm": 6.108475685119629, - "learning_rate": 2.107887338885058e-05, - "loss": 0.6184, - "step": 196290 - }, - { - "epoch": 1.7353559999292774, - "grad_norm": 2.5158615112304688, - "learning_rate": 2.107740000117871e-05, - "loss": 0.4978, - "step": 196300 - }, - { - "epoch": 1.7354444031895895, - "grad_norm": 1.2883144617080688, - "learning_rate": 2.107592661350684e-05, - "loss": 0.525, - "step": 196310 - }, - { - "epoch": 1.7355328064499018, - "grad_norm": 4.02616548538208, - "learning_rate": 2.107445322583497e-05, - "loss": 0.564, - "step": 196320 - }, - { - "epoch": 1.7356212097102142, - "grad_norm": 11.756324768066406, - "learning_rate": 2.10729798381631e-05, - "loss": 0.6965, - "step": 196330 - }, - { - "epoch": 1.7357096129705263, - "grad_norm": 0.6863888502120972, - "learning_rate": 2.107150645049123e-05, - "loss": 0.6371, - "step": 196340 - }, - { - "epoch": 1.7357980162308386, - "grad_norm": 5.1146721839904785, - "learning_rate": 2.107003306281936e-05, - "loss": 0.6121, - "step": 196350 - }, - { - "epoch": 1.735886419491151, - "grad_norm": 1.0052541494369507, - "learning_rate": 2.1068559675147487e-05, - "loss": 0.5339, - "step": 196360 - }, - { - "epoch": 1.735974822751463, - "grad_norm": 1.4069644212722778, - "learning_rate": 2.1067086287475615e-05, - "loss": 0.4263, - "step": 196370 - }, - { - "epoch": 1.7360632260117752, - "grad_norm": 1.6357277631759644, - "learning_rate": 2.1065612899803747e-05, - "loss": 0.5789, - "step": 196380 - }, - { - "epoch": 1.7361516292720875, - "grad_norm": 1.6332443952560425, - "learning_rate": 2.1064139512131875e-05, - "loss": 0.6643, - "step": 196390 - }, - { - "epoch": 1.7362400325324, - "grad_norm": 8.895979881286621, - "learning_rate": 2.1062666124460004e-05, - "loss": 0.6415, - "step": 196400 - }, - { - "epoch": 1.736328435792712, - "grad_norm": 2.421715497970581, - "learning_rate": 2.1061192736788135e-05, - "loss": 0.6306, - "step": 196410 - }, - { - "epoch": 1.7364168390530241, - "grad_norm": 2.148012161254883, - "learning_rate": 2.1059719349116264e-05, - "loss": 0.528, - "step": 196420 - }, - { - "epoch": 1.7365052423133365, - "grad_norm": 2.4306044578552246, - "learning_rate": 2.1058245961444392e-05, - "loss": 0.6064, - "step": 196430 - }, - { - "epoch": 1.7365936455736488, - "grad_norm": 1.142021656036377, - "learning_rate": 2.105677257377252e-05, - "loss": 0.4912, - "step": 196440 - }, - { - "epoch": 1.736682048833961, - "grad_norm": 2.4800100326538086, - "learning_rate": 2.1055299186100652e-05, - "loss": 0.6269, - "step": 196450 - }, - { - "epoch": 1.7367704520942733, - "grad_norm": 4.973618507385254, - "learning_rate": 2.105382579842878e-05, - "loss": 0.6908, - "step": 196460 - }, - { - "epoch": 1.7368588553545856, - "grad_norm": 2.3804619312286377, - "learning_rate": 2.105235241075691e-05, - "loss": 0.6208, - "step": 196470 - }, - { - "epoch": 1.7369472586148977, - "grad_norm": 3.174193859100342, - "learning_rate": 2.1050879023085037e-05, - "loss": 0.5373, - "step": 196480 - }, - { - "epoch": 1.7370356618752099, - "grad_norm": 1.7836205959320068, - "learning_rate": 2.104940563541317e-05, - "loss": 0.5188, - "step": 196490 - }, - { - "epoch": 1.7371240651355222, - "grad_norm": 2.734497308731079, - "learning_rate": 2.1047932247741297e-05, - "loss": 0.4601, - "step": 196500 - }, - { - "epoch": 1.7372124683958345, - "grad_norm": 7.385402679443359, - "learning_rate": 2.1046458860069426e-05, - "loss": 0.717, - "step": 196510 - }, - { - "epoch": 1.7373008716561467, - "grad_norm": 2.8061513900756836, - "learning_rate": 2.1044985472397557e-05, - "loss": 0.6686, - "step": 196520 - }, - { - "epoch": 1.7373892749164588, - "grad_norm": 1.9530673027038574, - "learning_rate": 2.1043512084725686e-05, - "loss": 0.6468, - "step": 196530 - }, - { - "epoch": 1.7374776781767711, - "grad_norm": 2.8552348613739014, - "learning_rate": 2.1042038697053814e-05, - "loss": 0.6425, - "step": 196540 - }, - { - "epoch": 1.7375660814370835, - "grad_norm": 2.1397485733032227, - "learning_rate": 2.1040565309381942e-05, - "loss": 0.5901, - "step": 196550 - }, - { - "epoch": 1.7376544846973956, - "grad_norm": 1.5400224924087524, - "learning_rate": 2.1039091921710074e-05, - "loss": 0.544, - "step": 196560 - }, - { - "epoch": 1.737742887957708, - "grad_norm": 2.6097347736358643, - "learning_rate": 2.1037618534038203e-05, - "loss": 0.6636, - "step": 196570 - }, - { - "epoch": 1.7378312912180203, - "grad_norm": 1.6473885774612427, - "learning_rate": 2.103614514636633e-05, - "loss": 0.6324, - "step": 196580 - }, - { - "epoch": 1.7379196944783324, - "grad_norm": 8.564188957214355, - "learning_rate": 2.103467175869446e-05, - "loss": 0.586, - "step": 196590 - }, - { - "epoch": 1.7380080977386445, - "grad_norm": 3.2612433433532715, - "learning_rate": 2.103319837102259e-05, - "loss": 0.5477, - "step": 196600 - }, - { - "epoch": 1.7380965009989569, - "grad_norm": 1.9359999895095825, - "learning_rate": 2.103172498335072e-05, - "loss": 0.5392, - "step": 196610 - }, - { - "epoch": 1.7381849042592692, - "grad_norm": 4.035454273223877, - "learning_rate": 2.1030251595678848e-05, - "loss": 0.6259, - "step": 196620 - }, - { - "epoch": 1.7382733075195813, - "grad_norm": 5.301139831542969, - "learning_rate": 2.102877820800698e-05, - "loss": 0.5815, - "step": 196630 - }, - { - "epoch": 1.7383617107798934, - "grad_norm": 2.0500972270965576, - "learning_rate": 2.1027304820335108e-05, - "loss": 0.5499, - "step": 196640 - }, - { - "epoch": 1.7384501140402058, - "grad_norm": 1.364989161491394, - "learning_rate": 2.1025831432663236e-05, - "loss": 0.5388, - "step": 196650 - }, - { - "epoch": 1.7385385173005181, - "grad_norm": 3.1312942504882812, - "learning_rate": 2.1024358044991364e-05, - "loss": 0.6786, - "step": 196660 - }, - { - "epoch": 1.7386269205608302, - "grad_norm": 1.338382601737976, - "learning_rate": 2.1022884657319496e-05, - "loss": 0.5909, - "step": 196670 - }, - { - "epoch": 1.7387153238211426, - "grad_norm": 9.556388854980469, - "learning_rate": 2.1021411269647625e-05, - "loss": 0.6866, - "step": 196680 - }, - { - "epoch": 1.738803727081455, - "grad_norm": 1.7506722211837769, - "learning_rate": 2.1019937881975753e-05, - "loss": 0.5025, - "step": 196690 - }, - { - "epoch": 1.738892130341767, - "grad_norm": 3.2631466388702393, - "learning_rate": 2.1018464494303885e-05, - "loss": 0.5898, - "step": 196700 - }, - { - "epoch": 1.7389805336020792, - "grad_norm": 1.8236184120178223, - "learning_rate": 2.1016991106632013e-05, - "loss": 0.4798, - "step": 196710 - }, - { - "epoch": 1.7390689368623915, - "grad_norm": 4.561819076538086, - "learning_rate": 2.101551771896014e-05, - "loss": 0.5659, - "step": 196720 - }, - { - "epoch": 1.7391573401227038, - "grad_norm": 11.79699420928955, - "learning_rate": 2.101404433128827e-05, - "loss": 0.4901, - "step": 196730 - }, - { - "epoch": 1.739245743383016, - "grad_norm": 1.3858580589294434, - "learning_rate": 2.10125709436164e-05, - "loss": 0.6426, - "step": 196740 - }, - { - "epoch": 1.739334146643328, - "grad_norm": 7.432936668395996, - "learning_rate": 2.101109755594453e-05, - "loss": 0.6957, - "step": 196750 - }, - { - "epoch": 1.7394225499036404, - "grad_norm": 3.0839948654174805, - "learning_rate": 2.1009624168272658e-05, - "loss": 0.5432, - "step": 196760 - }, - { - "epoch": 1.7395109531639528, - "grad_norm": 3.8584437370300293, - "learning_rate": 2.1008150780600787e-05, - "loss": 0.5606, - "step": 196770 - }, - { - "epoch": 1.7395993564242649, - "grad_norm": 2.1494619846343994, - "learning_rate": 2.1006677392928918e-05, - "loss": 0.6634, - "step": 196780 - }, - { - "epoch": 1.739687759684577, - "grad_norm": 1.4211783409118652, - "learning_rate": 2.1005204005257047e-05, - "loss": 0.519, - "step": 196790 - }, - { - "epoch": 1.7397761629448896, - "grad_norm": 2.011362314224243, - "learning_rate": 2.1003730617585175e-05, - "loss": 0.5824, - "step": 196800 - }, - { - "epoch": 1.7398645662052017, - "grad_norm": 1.8710979223251343, - "learning_rate": 2.1002257229913307e-05, - "loss": 0.5322, - "step": 196810 - }, - { - "epoch": 1.7399529694655138, - "grad_norm": 1.362769365310669, - "learning_rate": 2.1000783842241435e-05, - "loss": 0.6559, - "step": 196820 - }, - { - "epoch": 1.7400413727258262, - "grad_norm": 3.9178881645202637, - "learning_rate": 2.0999310454569563e-05, - "loss": 0.5809, - "step": 196830 - }, - { - "epoch": 1.7401297759861385, - "grad_norm": 3.1922249794006348, - "learning_rate": 2.0997837066897695e-05, - "loss": 0.6025, - "step": 196840 - }, - { - "epoch": 1.7402181792464506, - "grad_norm": 1.3927693367004395, - "learning_rate": 2.0996363679225824e-05, - "loss": 0.569, - "step": 196850 - }, - { - "epoch": 1.7403065825067627, - "grad_norm": 3.245332956314087, - "learning_rate": 2.0994890291553955e-05, - "loss": 0.6722, - "step": 196860 - }, - { - "epoch": 1.740394985767075, - "grad_norm": 0.815080463886261, - "learning_rate": 2.0993416903882084e-05, - "loss": 0.6002, - "step": 196870 - }, - { - "epoch": 1.7404833890273874, - "grad_norm": 6.448976993560791, - "learning_rate": 2.0991943516210212e-05, - "loss": 0.6991, - "step": 196880 - }, - { - "epoch": 1.7405717922876995, - "grad_norm": 1.5082669258117676, - "learning_rate": 2.0990470128538344e-05, - "loss": 0.406, - "step": 196890 - }, - { - "epoch": 1.7406601955480117, - "grad_norm": 18.35371208190918, - "learning_rate": 2.0988996740866472e-05, - "loss": 0.69, - "step": 196900 - }, - { - "epoch": 1.740748598808324, - "grad_norm": 1.7704530954360962, - "learning_rate": 2.09875233531946e-05, - "loss": 0.4987, - "step": 196910 - }, - { - "epoch": 1.7408370020686363, - "grad_norm": 2.0578227043151855, - "learning_rate": 2.0986049965522732e-05, - "loss": 0.5107, - "step": 196920 - }, - { - "epoch": 1.7409254053289485, - "grad_norm": 12.334599494934082, - "learning_rate": 2.098457657785086e-05, - "loss": 0.5618, - "step": 196930 - }, - { - "epoch": 1.7410138085892608, - "grad_norm": 1.5528627634048462, - "learning_rate": 2.098310319017899e-05, - "loss": 0.6015, - "step": 196940 - }, - { - "epoch": 1.7411022118495731, - "grad_norm": 4.1872100830078125, - "learning_rate": 2.0981629802507117e-05, - "loss": 0.4583, - "step": 196950 - }, - { - "epoch": 1.7411906151098853, - "grad_norm": 3.3384766578674316, - "learning_rate": 2.098015641483525e-05, - "loss": 0.6458, - "step": 196960 - }, - { - "epoch": 1.7412790183701974, - "grad_norm": 3.9110372066497803, - "learning_rate": 2.0978683027163377e-05, - "loss": 0.5932, - "step": 196970 - }, - { - "epoch": 1.7413674216305097, - "grad_norm": 10.663474082946777, - "learning_rate": 2.0977209639491506e-05, - "loss": 0.5076, - "step": 196980 - }, - { - "epoch": 1.741455824890822, - "grad_norm": 4.7137064933776855, - "learning_rate": 2.0975736251819637e-05, - "loss": 0.5824, - "step": 196990 - }, - { - "epoch": 1.7415442281511342, - "grad_norm": 0.7676690220832825, - "learning_rate": 2.0974262864147766e-05, - "loss": 0.4205, - "step": 197000 - }, - { - "epoch": 1.7416326314114463, - "grad_norm": 9.2551851272583, - "learning_rate": 2.0972789476475894e-05, - "loss": 0.6729, - "step": 197010 - }, - { - "epoch": 1.7417210346717586, - "grad_norm": 7.426784038543701, - "learning_rate": 2.0971316088804023e-05, - "loss": 0.6624, - "step": 197020 - }, - { - "epoch": 1.741809437932071, - "grad_norm": 0.8745737075805664, - "learning_rate": 2.0969842701132154e-05, - "loss": 0.5729, - "step": 197030 - }, - { - "epoch": 1.7418978411923831, - "grad_norm": 8.710653305053711, - "learning_rate": 2.0968369313460283e-05, - "loss": 0.5401, - "step": 197040 - }, - { - "epoch": 1.7419862444526955, - "grad_norm": 1.2940747737884521, - "learning_rate": 2.096689592578841e-05, - "loss": 0.5226, - "step": 197050 - }, - { - "epoch": 1.7420746477130078, - "grad_norm": 20.898895263671875, - "learning_rate": 2.0965422538116543e-05, - "loss": 0.634, - "step": 197060 - }, - { - "epoch": 1.74216305097332, - "grad_norm": 12.74487590789795, - "learning_rate": 2.096394915044467e-05, - "loss": 0.4975, - "step": 197070 - }, - { - "epoch": 1.742251454233632, - "grad_norm": 3.2381784915924072, - "learning_rate": 2.09624757627728e-05, - "loss": 0.4833, - "step": 197080 - }, - { - "epoch": 1.7423398574939444, - "grad_norm": 2.0809082984924316, - "learning_rate": 2.0961002375100928e-05, - "loss": 0.6267, - "step": 197090 - }, - { - "epoch": 1.7424282607542567, - "grad_norm": 1.789787769317627, - "learning_rate": 2.095952898742906e-05, - "loss": 0.4638, - "step": 197100 - }, - { - "epoch": 1.7425166640145688, - "grad_norm": 1.7888133525848389, - "learning_rate": 2.0958055599757188e-05, - "loss": 0.6171, - "step": 197110 - }, - { - "epoch": 1.742605067274881, - "grad_norm": 6.754447937011719, - "learning_rate": 2.0956582212085316e-05, - "loss": 0.5902, - "step": 197120 - }, - { - "epoch": 1.7426934705351933, - "grad_norm": 2.8703441619873047, - "learning_rate": 2.0955108824413445e-05, - "loss": 0.6575, - "step": 197130 - }, - { - "epoch": 1.7427818737955056, - "grad_norm": 25.108125686645508, - "learning_rate": 2.0953635436741576e-05, - "loss": 0.6874, - "step": 197140 - }, - { - "epoch": 1.7428702770558178, - "grad_norm": 3.9465975761413574, - "learning_rate": 2.0952162049069705e-05, - "loss": 0.6163, - "step": 197150 - }, - { - "epoch": 1.74295868031613, - "grad_norm": 8.941535949707031, - "learning_rate": 2.0950688661397833e-05, - "loss": 0.7202, - "step": 197160 - }, - { - "epoch": 1.7430470835764424, - "grad_norm": 0.8280032873153687, - "learning_rate": 2.0949215273725965e-05, - "loss": 0.4764, - "step": 197170 - }, - { - "epoch": 1.7431354868367546, - "grad_norm": 1.437925100326538, - "learning_rate": 2.0947741886054093e-05, - "loss": 0.5309, - "step": 197180 - }, - { - "epoch": 1.7432238900970667, - "grad_norm": 3.3845760822296143, - "learning_rate": 2.094626849838222e-05, - "loss": 0.5555, - "step": 197190 - }, - { - "epoch": 1.743312293357379, - "grad_norm": 6.526894569396973, - "learning_rate": 2.094479511071035e-05, - "loss": 0.6583, - "step": 197200 - }, - { - "epoch": 1.7434006966176914, - "grad_norm": 2.2649056911468506, - "learning_rate": 2.094332172303848e-05, - "loss": 0.6363, - "step": 197210 - }, - { - "epoch": 1.7434890998780035, - "grad_norm": 1.5738238096237183, - "learning_rate": 2.094184833536661e-05, - "loss": 0.7025, - "step": 197220 - }, - { - "epoch": 1.7435775031383156, - "grad_norm": 1.7489577531814575, - "learning_rate": 2.0940374947694738e-05, - "loss": 0.6186, - "step": 197230 - }, - { - "epoch": 1.743665906398628, - "grad_norm": 6.811504364013672, - "learning_rate": 2.0938901560022867e-05, - "loss": 0.5891, - "step": 197240 - }, - { - "epoch": 1.7437543096589403, - "grad_norm": 2.0611021518707275, - "learning_rate": 2.0937428172351e-05, - "loss": 0.5561, - "step": 197250 - }, - { - "epoch": 1.7438427129192524, - "grad_norm": 1.748860478401184, - "learning_rate": 2.0935954784679127e-05, - "loss": 0.6354, - "step": 197260 - }, - { - "epoch": 1.7439311161795648, - "grad_norm": 2.872514486312866, - "learning_rate": 2.0934481397007255e-05, - "loss": 0.6794, - "step": 197270 - }, - { - "epoch": 1.744019519439877, - "grad_norm": 2.2312440872192383, - "learning_rate": 2.0933008009335387e-05, - "loss": 0.63, - "step": 197280 - }, - { - "epoch": 1.7441079227001892, - "grad_norm": 2.638317584991455, - "learning_rate": 2.0931534621663515e-05, - "loss": 0.6223, - "step": 197290 - }, - { - "epoch": 1.7441963259605013, - "grad_norm": 1.5555953979492188, - "learning_rate": 2.0930061233991644e-05, - "loss": 0.513, - "step": 197300 - }, - { - "epoch": 1.7442847292208137, - "grad_norm": 1.947716474533081, - "learning_rate": 2.0928587846319772e-05, - "loss": 0.5422, - "step": 197310 - }, - { - "epoch": 1.744373132481126, - "grad_norm": 5.751101493835449, - "learning_rate": 2.0927114458647904e-05, - "loss": 0.6981, - "step": 197320 - }, - { - "epoch": 1.7444615357414381, - "grad_norm": 1.7628252506256104, - "learning_rate": 2.0925641070976032e-05, - "loss": 0.5721, - "step": 197330 - }, - { - "epoch": 1.7445499390017503, - "grad_norm": 3.8472437858581543, - "learning_rate": 2.092416768330416e-05, - "loss": 0.5763, - "step": 197340 - }, - { - "epoch": 1.7446383422620626, - "grad_norm": 3.0430450439453125, - "learning_rate": 2.0922694295632292e-05, - "loss": 0.6133, - "step": 197350 - }, - { - "epoch": 1.744726745522375, - "grad_norm": 1.3532829284667969, - "learning_rate": 2.092122090796042e-05, - "loss": 0.5364, - "step": 197360 - }, - { - "epoch": 1.744815148782687, - "grad_norm": 3.3958280086517334, - "learning_rate": 2.091974752028855e-05, - "loss": 0.6856, - "step": 197370 - }, - { - "epoch": 1.7449035520429992, - "grad_norm": 1.0810480117797852, - "learning_rate": 2.0918274132616677e-05, - "loss": 0.6368, - "step": 197380 - }, - { - "epoch": 1.7449919553033117, - "grad_norm": 3.335617780685425, - "learning_rate": 2.091680074494481e-05, - "loss": 0.4823, - "step": 197390 - }, - { - "epoch": 1.7450803585636239, - "grad_norm": 4.657704830169678, - "learning_rate": 2.0915327357272937e-05, - "loss": 0.5041, - "step": 197400 - }, - { - "epoch": 1.745168761823936, - "grad_norm": 2.925278663635254, - "learning_rate": 2.0913853969601066e-05, - "loss": 0.7198, - "step": 197410 - }, - { - "epoch": 1.7452571650842483, - "grad_norm": 2.435164451599121, - "learning_rate": 2.0912380581929194e-05, - "loss": 0.6114, - "step": 197420 - }, - { - "epoch": 1.7453455683445607, - "grad_norm": 2.3864328861236572, - "learning_rate": 2.0910907194257326e-05, - "loss": 0.4957, - "step": 197430 - }, - { - "epoch": 1.7454339716048728, - "grad_norm": 2.1344692707061768, - "learning_rate": 2.0909433806585454e-05, - "loss": 0.6309, - "step": 197440 - }, - { - "epoch": 1.745522374865185, - "grad_norm": 8.217358589172363, - "learning_rate": 2.0907960418913582e-05, - "loss": 0.5852, - "step": 197450 - }, - { - "epoch": 1.7456107781254973, - "grad_norm": 2.3924551010131836, - "learning_rate": 2.0906487031241714e-05, - "loss": 0.5538, - "step": 197460 - }, - { - "epoch": 1.7456991813858096, - "grad_norm": 2.210322380065918, - "learning_rate": 2.0905013643569842e-05, - "loss": 0.6097, - "step": 197470 - }, - { - "epoch": 1.7457875846461217, - "grad_norm": 4.129487991333008, - "learning_rate": 2.090354025589797e-05, - "loss": 0.6492, - "step": 197480 - }, - { - "epoch": 1.7458759879064338, - "grad_norm": 4.150144577026367, - "learning_rate": 2.09020668682261e-05, - "loss": 0.6553, - "step": 197490 - }, - { - "epoch": 1.7459643911667462, - "grad_norm": 6.629619598388672, - "learning_rate": 2.090059348055423e-05, - "loss": 0.5711, - "step": 197500 - }, - { - "epoch": 1.7460527944270585, - "grad_norm": 1.9283958673477173, - "learning_rate": 2.089912009288236e-05, - "loss": 0.443, - "step": 197510 - }, - { - "epoch": 1.7461411976873706, - "grad_norm": 2.020054578781128, - "learning_rate": 2.0897646705210488e-05, - "loss": 0.6163, - "step": 197520 - }, - { - "epoch": 1.746229600947683, - "grad_norm": 8.528481483459473, - "learning_rate": 2.089617331753862e-05, - "loss": 0.5935, - "step": 197530 - }, - { - "epoch": 1.7463180042079953, - "grad_norm": 1.652199149131775, - "learning_rate": 2.0894699929866748e-05, - "loss": 0.5647, - "step": 197540 - }, - { - "epoch": 1.7464064074683074, - "grad_norm": 5.253749370574951, - "learning_rate": 2.0893226542194876e-05, - "loss": 0.6569, - "step": 197550 - }, - { - "epoch": 1.7464948107286196, - "grad_norm": 1.7958537340164185, - "learning_rate": 2.0891753154523004e-05, - "loss": 0.5579, - "step": 197560 - }, - { - "epoch": 1.746583213988932, - "grad_norm": 3.0275492668151855, - "learning_rate": 2.0890279766851136e-05, - "loss": 0.6173, - "step": 197570 - }, - { - "epoch": 1.7466716172492442, - "grad_norm": 1.8350715637207031, - "learning_rate": 2.0888806379179265e-05, - "loss": 0.6338, - "step": 197580 - }, - { - "epoch": 1.7467600205095564, - "grad_norm": 3.038846015930176, - "learning_rate": 2.0887332991507393e-05, - "loss": 0.5396, - "step": 197590 - }, - { - "epoch": 1.7468484237698685, - "grad_norm": 11.514623641967773, - "learning_rate": 2.088585960383552e-05, - "loss": 0.5687, - "step": 197600 - }, - { - "epoch": 1.7469368270301808, - "grad_norm": 4.025920867919922, - "learning_rate": 2.0884386216163653e-05, - "loss": 0.6045, - "step": 197610 - }, - { - "epoch": 1.7470252302904932, - "grad_norm": 1.6115721464157104, - "learning_rate": 2.088291282849178e-05, - "loss": 0.5957, - "step": 197620 - }, - { - "epoch": 1.7471136335508053, - "grad_norm": 4.450847625732422, - "learning_rate": 2.088143944081991e-05, - "loss": 0.5725, - "step": 197630 - }, - { - "epoch": 1.7472020368111176, - "grad_norm": 0.9861357808113098, - "learning_rate": 2.087996605314804e-05, - "loss": 0.555, - "step": 197640 - }, - { - "epoch": 1.74729044007143, - "grad_norm": 1.5301084518432617, - "learning_rate": 2.087849266547617e-05, - "loss": 0.6504, - "step": 197650 - }, - { - "epoch": 1.747378843331742, - "grad_norm": 1.3909530639648438, - "learning_rate": 2.0877019277804298e-05, - "loss": 0.5335, - "step": 197660 - }, - { - "epoch": 1.7474672465920542, - "grad_norm": 1.4576557874679565, - "learning_rate": 2.0875545890132426e-05, - "loss": 0.5952, - "step": 197670 - }, - { - "epoch": 1.7475556498523666, - "grad_norm": 1.7314077615737915, - "learning_rate": 2.0874072502460558e-05, - "loss": 0.719, - "step": 197680 - }, - { - "epoch": 1.747644053112679, - "grad_norm": 1.678776741027832, - "learning_rate": 2.0872599114788687e-05, - "loss": 0.546, - "step": 197690 - }, - { - "epoch": 1.747732456372991, - "grad_norm": 2.1321935653686523, - "learning_rate": 2.0871125727116815e-05, - "loss": 0.5656, - "step": 197700 - }, - { - "epoch": 1.7478208596333031, - "grad_norm": 2.211000919342041, - "learning_rate": 2.0869652339444947e-05, - "loss": 0.6632, - "step": 197710 - }, - { - "epoch": 1.7479092628936155, - "grad_norm": 1.558192491531372, - "learning_rate": 2.0868178951773075e-05, - "loss": 0.5655, - "step": 197720 - }, - { - "epoch": 1.7479976661539278, - "grad_norm": 3.241481304168701, - "learning_rate": 2.0866705564101203e-05, - "loss": 0.5737, - "step": 197730 - }, - { - "epoch": 1.74808606941424, - "grad_norm": 2.6850011348724365, - "learning_rate": 2.0865232176429335e-05, - "loss": 0.4636, - "step": 197740 - }, - { - "epoch": 1.7481744726745523, - "grad_norm": 2.521146297454834, - "learning_rate": 2.0863758788757463e-05, - "loss": 0.498, - "step": 197750 - }, - { - "epoch": 1.7482628759348646, - "grad_norm": 13.72861385345459, - "learning_rate": 2.0862285401085592e-05, - "loss": 0.4228, - "step": 197760 - }, - { - "epoch": 1.7483512791951767, - "grad_norm": 3.342637777328491, - "learning_rate": 2.0860812013413724e-05, - "loss": 0.6127, - "step": 197770 - }, - { - "epoch": 1.7484396824554889, - "grad_norm": 8.745201110839844, - "learning_rate": 2.0859338625741852e-05, - "loss": 0.6358, - "step": 197780 - }, - { - "epoch": 1.7485280857158012, - "grad_norm": 3.7885406017303467, - "learning_rate": 2.085786523806998e-05, - "loss": 0.4518, - "step": 197790 - }, - { - "epoch": 1.7486164889761135, - "grad_norm": 2.9585280418395996, - "learning_rate": 2.0856391850398112e-05, - "loss": 0.6938, - "step": 197800 - }, - { - "epoch": 1.7487048922364257, - "grad_norm": 2.1558680534362793, - "learning_rate": 2.085491846272624e-05, - "loss": 0.6789, - "step": 197810 - }, - { - "epoch": 1.7487932954967378, - "grad_norm": 2.011660575866699, - "learning_rate": 2.085344507505437e-05, - "loss": 0.5276, - "step": 197820 - }, - { - "epoch": 1.7488816987570501, - "grad_norm": 1.6391394138336182, - "learning_rate": 2.08519716873825e-05, - "loss": 0.6125, - "step": 197830 - }, - { - "epoch": 1.7489701020173625, - "grad_norm": 1.2173222303390503, - "learning_rate": 2.085049829971063e-05, - "loss": 0.6754, - "step": 197840 - }, - { - "epoch": 1.7490585052776746, - "grad_norm": 2.6116750240325928, - "learning_rate": 2.0849024912038757e-05, - "loss": 0.6366, - "step": 197850 - }, - { - "epoch": 1.749146908537987, - "grad_norm": 16.526519775390625, - "learning_rate": 2.084755152436689e-05, - "loss": 0.5286, - "step": 197860 - }, - { - "epoch": 1.7492353117982993, - "grad_norm": 4.576642990112305, - "learning_rate": 2.0846078136695017e-05, - "loss": 0.6201, - "step": 197870 - }, - { - "epoch": 1.7493237150586114, - "grad_norm": 5.97806453704834, - "learning_rate": 2.0844604749023146e-05, - "loss": 0.5511, - "step": 197880 - }, - { - "epoch": 1.7494121183189235, - "grad_norm": 2.474228858947754, - "learning_rate": 2.0843131361351274e-05, - "loss": 0.6874, - "step": 197890 - }, - { - "epoch": 1.7495005215792359, - "grad_norm": 3.8049893379211426, - "learning_rate": 2.0841657973679406e-05, - "loss": 0.6463, - "step": 197900 - }, - { - "epoch": 1.7495889248395482, - "grad_norm": 2.4274582862854004, - "learning_rate": 2.0840184586007534e-05, - "loss": 0.5975, - "step": 197910 - }, - { - "epoch": 1.7496773280998603, - "grad_norm": 2.680166244506836, - "learning_rate": 2.0838711198335662e-05, - "loss": 0.5186, - "step": 197920 - }, - { - "epoch": 1.7497657313601724, - "grad_norm": 5.076358795166016, - "learning_rate": 2.0837237810663794e-05, - "loss": 0.6417, - "step": 197930 - }, - { - "epoch": 1.7498541346204848, - "grad_norm": 1.2135424613952637, - "learning_rate": 2.0835764422991923e-05, - "loss": 0.6321, - "step": 197940 - }, - { - "epoch": 1.7499425378807971, - "grad_norm": 3.6951210498809814, - "learning_rate": 2.083429103532005e-05, - "loss": 0.6703, - "step": 197950 - }, - { - "epoch": 1.7500309411411092, - "grad_norm": 1.8483201265335083, - "learning_rate": 2.083281764764818e-05, - "loss": 0.6007, - "step": 197960 - }, - { - "epoch": 1.7501193444014214, - "grad_norm": 2.100478410720825, - "learning_rate": 2.083134425997631e-05, - "loss": 0.6677, - "step": 197970 - }, - { - "epoch": 1.750207747661734, - "grad_norm": 3.2420291900634766, - "learning_rate": 2.082987087230444e-05, - "loss": 0.4966, - "step": 197980 - }, - { - "epoch": 1.750296150922046, - "grad_norm": 1.5824224948883057, - "learning_rate": 2.0828397484632568e-05, - "loss": 0.5962, - "step": 197990 - }, - { - "epoch": 1.7503845541823582, - "grad_norm": 8.216656684875488, - "learning_rate": 2.08269240969607e-05, - "loss": 0.4532, - "step": 198000 - }, - { - "epoch": 1.7504729574426705, - "grad_norm": 0.8512338399887085, - "learning_rate": 2.0825450709288828e-05, - "loss": 0.5657, - "step": 198010 - }, - { - "epoch": 1.7505613607029828, - "grad_norm": 5.59337043762207, - "learning_rate": 2.0823977321616956e-05, - "loss": 0.5933, - "step": 198020 - }, - { - "epoch": 1.750649763963295, - "grad_norm": 1.7913964986801147, - "learning_rate": 2.0822503933945085e-05, - "loss": 0.5116, - "step": 198030 - }, - { - "epoch": 1.750738167223607, - "grad_norm": 3.5717811584472656, - "learning_rate": 2.0821030546273216e-05, - "loss": 0.5585, - "step": 198040 - }, - { - "epoch": 1.7508265704839194, - "grad_norm": 3.144894599914551, - "learning_rate": 2.0819557158601345e-05, - "loss": 0.6281, - "step": 198050 - }, - { - "epoch": 1.7509149737442318, - "grad_norm": 2.6191606521606445, - "learning_rate": 2.0818083770929473e-05, - "loss": 0.5447, - "step": 198060 - }, - { - "epoch": 1.751003377004544, - "grad_norm": 0.7619302868843079, - "learning_rate": 2.08166103832576e-05, - "loss": 0.4921, - "step": 198070 - }, - { - "epoch": 1.751091780264856, - "grad_norm": 4.799787998199463, - "learning_rate": 2.0815136995585733e-05, - "loss": 0.5855, - "step": 198080 - }, - { - "epoch": 1.7511801835251684, - "grad_norm": 1.671536922454834, - "learning_rate": 2.081366360791386e-05, - "loss": 0.5948, - "step": 198090 - }, - { - "epoch": 1.7512685867854807, - "grad_norm": 3.216219425201416, - "learning_rate": 2.081219022024199e-05, - "loss": 0.5781, - "step": 198100 - }, - { - "epoch": 1.7513569900457928, - "grad_norm": 8.024598121643066, - "learning_rate": 2.081071683257012e-05, - "loss": 0.5581, - "step": 198110 - }, - { - "epoch": 1.7514453933061052, - "grad_norm": 1.9712045192718506, - "learning_rate": 2.080924344489825e-05, - "loss": 0.5852, - "step": 198120 - }, - { - "epoch": 1.7515337965664175, - "grad_norm": 2.13334321975708, - "learning_rate": 2.0807770057226378e-05, - "loss": 0.8064, - "step": 198130 - }, - { - "epoch": 1.7516221998267296, - "grad_norm": 15.331814765930176, - "learning_rate": 2.0806296669554507e-05, - "loss": 0.558, - "step": 198140 - }, - { - "epoch": 1.7517106030870417, - "grad_norm": 1.680092215538025, - "learning_rate": 2.080482328188264e-05, - "loss": 0.6171, - "step": 198150 - }, - { - "epoch": 1.751799006347354, - "grad_norm": 1.7920665740966797, - "learning_rate": 2.0803349894210767e-05, - "loss": 0.5709, - "step": 198160 - }, - { - "epoch": 1.7518874096076664, - "grad_norm": 2.1616902351379395, - "learning_rate": 2.0801876506538895e-05, - "loss": 0.6331, - "step": 198170 - }, - { - "epoch": 1.7519758128679785, - "grad_norm": 1.8493404388427734, - "learning_rate": 2.0800403118867027e-05, - "loss": 0.6714, - "step": 198180 - }, - { - "epoch": 1.7520642161282907, - "grad_norm": 5.895579814910889, - "learning_rate": 2.0798929731195155e-05, - "loss": 0.6431, - "step": 198190 - }, - { - "epoch": 1.752152619388603, - "grad_norm": 5.15779972076416, - "learning_rate": 2.0797456343523283e-05, - "loss": 0.7133, - "step": 198200 - }, - { - "epoch": 1.7522410226489153, - "grad_norm": 2.4788951873779297, - "learning_rate": 2.0795982955851412e-05, - "loss": 0.6498, - "step": 198210 - }, - { - "epoch": 1.7523294259092275, - "grad_norm": 2.990628957748413, - "learning_rate": 2.0794509568179544e-05, - "loss": 0.5041, - "step": 198220 - }, - { - "epoch": 1.7524178291695398, - "grad_norm": 2.803309202194214, - "learning_rate": 2.0793036180507672e-05, - "loss": 0.6219, - "step": 198230 - }, - { - "epoch": 1.7525062324298522, - "grad_norm": 2.890803337097168, - "learning_rate": 2.07915627928358e-05, - "loss": 0.658, - "step": 198240 - }, - { - "epoch": 1.7525946356901643, - "grad_norm": 9.967921257019043, - "learning_rate": 2.079008940516393e-05, - "loss": 0.469, - "step": 198250 - }, - { - "epoch": 1.7526830389504764, - "grad_norm": 1.3004199266433716, - "learning_rate": 2.078861601749206e-05, - "loss": 0.6036, - "step": 198260 - }, - { - "epoch": 1.7527714422107887, - "grad_norm": 1.2337579727172852, - "learning_rate": 2.078714262982019e-05, - "loss": 0.6715, - "step": 198270 - }, - { - "epoch": 1.752859845471101, - "grad_norm": 2.868938446044922, - "learning_rate": 2.0785669242148317e-05, - "loss": 0.6415, - "step": 198280 - }, - { - "epoch": 1.7529482487314132, - "grad_norm": 0.981107234954834, - "learning_rate": 2.078419585447645e-05, - "loss": 0.5801, - "step": 198290 - }, - { - "epoch": 1.7530366519917253, - "grad_norm": 8.54338550567627, - "learning_rate": 2.0782722466804577e-05, - "loss": 0.7166, - "step": 198300 - }, - { - "epoch": 1.7531250552520377, - "grad_norm": 1.221792221069336, - "learning_rate": 2.0781249079132706e-05, - "loss": 0.551, - "step": 198310 - }, - { - "epoch": 1.75321345851235, - "grad_norm": 4.302059650421143, - "learning_rate": 2.0779775691460834e-05, - "loss": 0.6789, - "step": 198320 - }, - { - "epoch": 1.7533018617726621, - "grad_norm": 5.496481418609619, - "learning_rate": 2.0778302303788966e-05, - "loss": 0.4865, - "step": 198330 - }, - { - "epoch": 1.7533902650329745, - "grad_norm": 4.6418986320495605, - "learning_rate": 2.0776828916117094e-05, - "loss": 0.6185, - "step": 198340 - }, - { - "epoch": 1.7534786682932868, - "grad_norm": 12.174217224121094, - "learning_rate": 2.0775355528445222e-05, - "loss": 0.6402, - "step": 198350 - }, - { - "epoch": 1.753567071553599, - "grad_norm": 4.296413421630859, - "learning_rate": 2.077388214077335e-05, - "loss": 0.671, - "step": 198360 - }, - { - "epoch": 1.753655474813911, - "grad_norm": 8.448339462280273, - "learning_rate": 2.0772408753101482e-05, - "loss": 0.5399, - "step": 198370 - }, - { - "epoch": 1.7537438780742234, - "grad_norm": 1.545261025428772, - "learning_rate": 2.077093536542961e-05, - "loss": 0.5635, - "step": 198380 - }, - { - "epoch": 1.7538322813345357, - "grad_norm": 12.022567749023438, - "learning_rate": 2.076946197775774e-05, - "loss": 0.6601, - "step": 198390 - }, - { - "epoch": 1.7539206845948478, - "grad_norm": 2.1051766872406006, - "learning_rate": 2.076798859008587e-05, - "loss": 0.4195, - "step": 198400 - }, - { - "epoch": 1.75400908785516, - "grad_norm": 1.4720613956451416, - "learning_rate": 2.0766515202414e-05, - "loss": 0.6752, - "step": 198410 - }, - { - "epoch": 1.7540974911154723, - "grad_norm": 1.620879054069519, - "learning_rate": 2.0765041814742128e-05, - "loss": 0.4582, - "step": 198420 - }, - { - "epoch": 1.7541858943757846, - "grad_norm": 1.0339560508728027, - "learning_rate": 2.0763568427070256e-05, - "loss": 0.5416, - "step": 198430 - }, - { - "epoch": 1.7542742976360968, - "grad_norm": 3.3532519340515137, - "learning_rate": 2.0762095039398388e-05, - "loss": 0.5899, - "step": 198440 - }, - { - "epoch": 1.754362700896409, - "grad_norm": 1.8915557861328125, - "learning_rate": 2.0760621651726516e-05, - "loss": 0.6154, - "step": 198450 - }, - { - "epoch": 1.7544511041567215, - "grad_norm": 3.085219144821167, - "learning_rate": 2.0759148264054644e-05, - "loss": 0.6711, - "step": 198460 - }, - { - "epoch": 1.7545395074170336, - "grad_norm": 2.022360324859619, - "learning_rate": 2.0757674876382776e-05, - "loss": 0.5663, - "step": 198470 - }, - { - "epoch": 1.7546279106773457, - "grad_norm": 7.102045059204102, - "learning_rate": 2.0756201488710904e-05, - "loss": 0.5613, - "step": 198480 - }, - { - "epoch": 1.754716313937658, - "grad_norm": 4.3691325187683105, - "learning_rate": 2.0754728101039033e-05, - "loss": 0.6342, - "step": 198490 - }, - { - "epoch": 1.7548047171979704, - "grad_norm": 1.8911253213882446, - "learning_rate": 2.075325471336716e-05, - "loss": 0.6014, - "step": 198500 - }, - { - "epoch": 1.7548931204582825, - "grad_norm": 0.9943745136260986, - "learning_rate": 2.0751781325695293e-05, - "loss": 0.5776, - "step": 198510 - }, - { - "epoch": 1.7549815237185946, - "grad_norm": 1.7579480409622192, - "learning_rate": 2.075030793802342e-05, - "loss": 0.5863, - "step": 198520 - }, - { - "epoch": 1.755069926978907, - "grad_norm": 1.8364801406860352, - "learning_rate": 2.074883455035155e-05, - "loss": 0.4875, - "step": 198530 - }, - { - "epoch": 1.7551583302392193, - "grad_norm": 4.80426549911499, - "learning_rate": 2.0747361162679678e-05, - "loss": 0.4999, - "step": 198540 - }, - { - "epoch": 1.7552467334995314, - "grad_norm": 3.127359390258789, - "learning_rate": 2.074588777500781e-05, - "loss": 0.4488, - "step": 198550 - }, - { - "epoch": 1.7553351367598435, - "grad_norm": 1.0734896659851074, - "learning_rate": 2.0744414387335938e-05, - "loss": 0.5554, - "step": 198560 - }, - { - "epoch": 1.755423540020156, - "grad_norm": 2.630678653717041, - "learning_rate": 2.0742940999664066e-05, - "loss": 0.5699, - "step": 198570 - }, - { - "epoch": 1.7555119432804682, - "grad_norm": 7.725094318389893, - "learning_rate": 2.0741467611992198e-05, - "loss": 0.6702, - "step": 198580 - }, - { - "epoch": 1.7556003465407803, - "grad_norm": 1.153080701828003, - "learning_rate": 2.0739994224320327e-05, - "loss": 0.6543, - "step": 198590 - }, - { - "epoch": 1.7556887498010927, - "grad_norm": 1.0754648447036743, - "learning_rate": 2.0738520836648455e-05, - "loss": 0.5494, - "step": 198600 - }, - { - "epoch": 1.755777153061405, - "grad_norm": 2.021418333053589, - "learning_rate": 2.0737047448976583e-05, - "loss": 0.5057, - "step": 198610 - }, - { - "epoch": 1.7558655563217171, - "grad_norm": 2.577563762664795, - "learning_rate": 2.0735574061304715e-05, - "loss": 0.6664, - "step": 198620 - }, - { - "epoch": 1.7559539595820293, - "grad_norm": 1.5344913005828857, - "learning_rate": 2.0734100673632843e-05, - "loss": 0.5796, - "step": 198630 - }, - { - "epoch": 1.7560423628423416, - "grad_norm": 4.062793731689453, - "learning_rate": 2.0732627285960972e-05, - "loss": 0.6136, - "step": 198640 - }, - { - "epoch": 1.756130766102654, - "grad_norm": 4.947664260864258, - "learning_rate": 2.0731153898289103e-05, - "loss": 0.6089, - "step": 198650 - }, - { - "epoch": 1.756219169362966, - "grad_norm": 1.329883337020874, - "learning_rate": 2.0729680510617232e-05, - "loss": 0.5791, - "step": 198660 - }, - { - "epoch": 1.7563075726232782, - "grad_norm": 2.21018385887146, - "learning_rate": 2.072820712294536e-05, - "loss": 0.617, - "step": 198670 - }, - { - "epoch": 1.7563959758835905, - "grad_norm": 10.442187309265137, - "learning_rate": 2.0726733735273492e-05, - "loss": 0.546, - "step": 198680 - }, - { - "epoch": 1.7564843791439029, - "grad_norm": 2.224388360977173, - "learning_rate": 2.072526034760162e-05, - "loss": 0.4472, - "step": 198690 - }, - { - "epoch": 1.756572782404215, - "grad_norm": 1.3046085834503174, - "learning_rate": 2.072378695992975e-05, - "loss": 0.4822, - "step": 198700 - }, - { - "epoch": 1.7566611856645273, - "grad_norm": 20.326618194580078, - "learning_rate": 2.072231357225788e-05, - "loss": 0.7144, - "step": 198710 - }, - { - "epoch": 1.7567495889248397, - "grad_norm": 2.609830141067505, - "learning_rate": 2.072084018458601e-05, - "loss": 0.5783, - "step": 198720 - }, - { - "epoch": 1.7568379921851518, - "grad_norm": 1.1245803833007812, - "learning_rate": 2.0719366796914137e-05, - "loss": 0.4985, - "step": 198730 - }, - { - "epoch": 1.756926395445464, - "grad_norm": 10.702179908752441, - "learning_rate": 2.071789340924227e-05, - "loss": 0.5078, - "step": 198740 - }, - { - "epoch": 1.7570147987057763, - "grad_norm": 2.8106372356414795, - "learning_rate": 2.0716420021570397e-05, - "loss": 0.5444, - "step": 198750 - }, - { - "epoch": 1.7571032019660886, - "grad_norm": 3.579751968383789, - "learning_rate": 2.0714946633898525e-05, - "loss": 0.6045, - "step": 198760 - }, - { - "epoch": 1.7571916052264007, - "grad_norm": 2.9466452598571777, - "learning_rate": 2.0713473246226657e-05, - "loss": 0.5714, - "step": 198770 - }, - { - "epoch": 1.7572800084867128, - "grad_norm": 5.917065143585205, - "learning_rate": 2.0711999858554786e-05, - "loss": 0.6135, - "step": 198780 - }, - { - "epoch": 1.7573684117470252, - "grad_norm": 1.6902612447738647, - "learning_rate": 2.0710526470882914e-05, - "loss": 0.5649, - "step": 198790 - }, - { - "epoch": 1.7574568150073375, - "grad_norm": 9.794297218322754, - "learning_rate": 2.0709053083211046e-05, - "loss": 0.5662, - "step": 198800 - }, - { - "epoch": 1.7575452182676496, - "grad_norm": 2.114468574523926, - "learning_rate": 2.0707579695539174e-05, - "loss": 0.6114, - "step": 198810 - }, - { - "epoch": 1.757633621527962, - "grad_norm": 3.443908452987671, - "learning_rate": 2.0706106307867302e-05, - "loss": 0.7409, - "step": 198820 - }, - { - "epoch": 1.7577220247882743, - "grad_norm": 4.537910461425781, - "learning_rate": 2.070463292019543e-05, - "loss": 0.6048, - "step": 198830 - }, - { - "epoch": 1.7578104280485864, - "grad_norm": 1.9517021179199219, - "learning_rate": 2.0703159532523562e-05, - "loss": 0.5075, - "step": 198840 - }, - { - "epoch": 1.7578988313088986, - "grad_norm": 0.9750070571899414, - "learning_rate": 2.070168614485169e-05, - "loss": 0.7622, - "step": 198850 - }, - { - "epoch": 1.757987234569211, - "grad_norm": 2.8106045722961426, - "learning_rate": 2.070021275717982e-05, - "loss": 0.6603, - "step": 198860 - }, - { - "epoch": 1.7580756378295233, - "grad_norm": 1.1765936613082886, - "learning_rate": 2.069873936950795e-05, - "loss": 0.5412, - "step": 198870 - }, - { - "epoch": 1.7581640410898354, - "grad_norm": 8.268555641174316, - "learning_rate": 2.069726598183608e-05, - "loss": 0.6645, - "step": 198880 - }, - { - "epoch": 1.7582524443501475, - "grad_norm": 1.3632766008377075, - "learning_rate": 2.0695792594164208e-05, - "loss": 0.5744, - "step": 198890 - }, - { - "epoch": 1.7583408476104598, - "grad_norm": 1.5119342803955078, - "learning_rate": 2.0694319206492336e-05, - "loss": 0.6186, - "step": 198900 - }, - { - "epoch": 1.7584292508707722, - "grad_norm": 1.6961848735809326, - "learning_rate": 2.0692845818820468e-05, - "loss": 0.5894, - "step": 198910 - }, - { - "epoch": 1.7585176541310843, - "grad_norm": 5.72066593170166, - "learning_rate": 2.0691372431148596e-05, - "loss": 0.6347, - "step": 198920 - }, - { - "epoch": 1.7586060573913966, - "grad_norm": 0.654653787612915, - "learning_rate": 2.0689899043476724e-05, - "loss": 0.5837, - "step": 198930 - }, - { - "epoch": 1.758694460651709, - "grad_norm": 4.1402668952941895, - "learning_rate": 2.0688425655804856e-05, - "loss": 0.6437, - "step": 198940 - }, - { - "epoch": 1.758782863912021, - "grad_norm": 2.6822288036346436, - "learning_rate": 2.0686952268132985e-05, - "loss": 0.6118, - "step": 198950 - }, - { - "epoch": 1.7588712671723332, - "grad_norm": 2.4100704193115234, - "learning_rate": 2.0685478880461113e-05, - "loss": 0.6775, - "step": 198960 - }, - { - "epoch": 1.7589596704326456, - "grad_norm": 4.156137943267822, - "learning_rate": 2.068400549278924e-05, - "loss": 0.5525, - "step": 198970 - }, - { - "epoch": 1.759048073692958, - "grad_norm": 2.91312837600708, - "learning_rate": 2.0682532105117373e-05, - "loss": 0.5631, - "step": 198980 - }, - { - "epoch": 1.75913647695327, - "grad_norm": 5.039311408996582, - "learning_rate": 2.06810587174455e-05, - "loss": 0.4401, - "step": 198990 - }, - { - "epoch": 1.7592248802135821, - "grad_norm": 5.100133895874023, - "learning_rate": 2.067958532977363e-05, - "loss": 0.5406, - "step": 199000 - }, - { - "epoch": 1.7593132834738945, - "grad_norm": 2.2148382663726807, - "learning_rate": 2.0678111942101758e-05, - "loss": 0.5846, - "step": 199010 - }, - { - "epoch": 1.7594016867342068, - "grad_norm": 3.281672239303589, - "learning_rate": 2.067663855442989e-05, - "loss": 0.5773, - "step": 199020 - }, - { - "epoch": 1.759490089994519, - "grad_norm": 2.9445834159851074, - "learning_rate": 2.0675165166758018e-05, - "loss": 0.6853, - "step": 199030 - }, - { - "epoch": 1.7595784932548313, - "grad_norm": 1.9498964548110962, - "learning_rate": 2.0673691779086147e-05, - "loss": 0.7405, - "step": 199040 - }, - { - "epoch": 1.7596668965151436, - "grad_norm": 7.0430169105529785, - "learning_rate": 2.0672218391414278e-05, - "loss": 0.6344, - "step": 199050 - }, - { - "epoch": 1.7597552997754558, - "grad_norm": 1.1378570795059204, - "learning_rate": 2.0670745003742407e-05, - "loss": 0.5853, - "step": 199060 - }, - { - "epoch": 1.7598437030357679, - "grad_norm": 1.6694504022598267, - "learning_rate": 2.0669271616070535e-05, - "loss": 0.6564, - "step": 199070 - }, - { - "epoch": 1.7599321062960802, - "grad_norm": 4.245837211608887, - "learning_rate": 2.0667798228398663e-05, - "loss": 0.5587, - "step": 199080 - }, - { - "epoch": 1.7600205095563926, - "grad_norm": 2.624056577682495, - "learning_rate": 2.0666324840726795e-05, - "loss": 0.5788, - "step": 199090 - }, - { - "epoch": 1.7601089128167047, - "grad_norm": 5.049633502960205, - "learning_rate": 2.0664851453054923e-05, - "loss": 0.55, - "step": 199100 - }, - { - "epoch": 1.7601973160770168, - "grad_norm": 8.455689430236816, - "learning_rate": 2.0663378065383052e-05, - "loss": 0.8119, - "step": 199110 - }, - { - "epoch": 1.7602857193373291, - "grad_norm": 4.369366645812988, - "learning_rate": 2.0661904677711184e-05, - "loss": 0.7053, - "step": 199120 - }, - { - "epoch": 1.7603741225976415, - "grad_norm": 2.9583048820495605, - "learning_rate": 2.0660431290039312e-05, - "loss": 0.4824, - "step": 199130 - }, - { - "epoch": 1.7604625258579536, - "grad_norm": 1.6215863227844238, - "learning_rate": 2.065895790236744e-05, - "loss": 0.4069, - "step": 199140 - }, - { - "epoch": 1.7605509291182657, - "grad_norm": 11.74459457397461, - "learning_rate": 2.065748451469557e-05, - "loss": 0.4307, - "step": 199150 - }, - { - "epoch": 1.7606393323785783, - "grad_norm": 1.71657395362854, - "learning_rate": 2.06560111270237e-05, - "loss": 0.5258, - "step": 199160 - }, - { - "epoch": 1.7607277356388904, - "grad_norm": 12.330255508422852, - "learning_rate": 2.065453773935183e-05, - "loss": 0.5932, - "step": 199170 - }, - { - "epoch": 1.7608161388992025, - "grad_norm": 2.850137710571289, - "learning_rate": 2.0653064351679957e-05, - "loss": 0.7165, - "step": 199180 - }, - { - "epoch": 1.7609045421595149, - "grad_norm": 1.646945595741272, - "learning_rate": 2.0651590964008085e-05, - "loss": 0.5961, - "step": 199190 - }, - { - "epoch": 1.7609929454198272, - "grad_norm": 1.6394953727722168, - "learning_rate": 2.0650117576336217e-05, - "loss": 0.5964, - "step": 199200 - }, - { - "epoch": 1.7610813486801393, - "grad_norm": 1.420682668685913, - "learning_rate": 2.0648644188664345e-05, - "loss": 0.5166, - "step": 199210 - }, - { - "epoch": 1.7611697519404514, - "grad_norm": 1.4977213144302368, - "learning_rate": 2.0647170800992474e-05, - "loss": 0.5271, - "step": 199220 - }, - { - "epoch": 1.7612581552007638, - "grad_norm": 1.1657967567443848, - "learning_rate": 2.0645697413320606e-05, - "loss": 0.585, - "step": 199230 - }, - { - "epoch": 1.7613465584610761, - "grad_norm": 4.959208011627197, - "learning_rate": 2.0644224025648734e-05, - "loss": 0.616, - "step": 199240 - }, - { - "epoch": 1.7614349617213882, - "grad_norm": 2.369187593460083, - "learning_rate": 2.0642750637976862e-05, - "loss": 0.5312, - "step": 199250 - }, - { - "epoch": 1.7615233649817004, - "grad_norm": 3.511531114578247, - "learning_rate": 2.064127725030499e-05, - "loss": 0.5762, - "step": 199260 - }, - { - "epoch": 1.7616117682420127, - "grad_norm": 1.6372212171554565, - "learning_rate": 2.0639803862633122e-05, - "loss": 0.6159, - "step": 199270 - }, - { - "epoch": 1.761700171502325, - "grad_norm": 2.4326720237731934, - "learning_rate": 2.063833047496125e-05, - "loss": 0.543, - "step": 199280 - }, - { - "epoch": 1.7617885747626372, - "grad_norm": 2.585983991622925, - "learning_rate": 2.063685708728938e-05, - "loss": 0.548, - "step": 199290 - }, - { - "epoch": 1.7618769780229495, - "grad_norm": 1.2142208814620972, - "learning_rate": 2.0635383699617507e-05, - "loss": 0.5861, - "step": 199300 - }, - { - "epoch": 1.7619653812832619, - "grad_norm": 5.508725166320801, - "learning_rate": 2.063391031194564e-05, - "loss": 0.5201, - "step": 199310 - }, - { - "epoch": 1.762053784543574, - "grad_norm": 3.581573724746704, - "learning_rate": 2.0632436924273768e-05, - "loss": 0.6495, - "step": 199320 - }, - { - "epoch": 1.762142187803886, - "grad_norm": 5.776298522949219, - "learning_rate": 2.0630963536601896e-05, - "loss": 0.5674, - "step": 199330 - }, - { - "epoch": 1.7622305910641984, - "grad_norm": 2.4548747539520264, - "learning_rate": 2.0629490148930028e-05, - "loss": 0.6964, - "step": 199340 - }, - { - "epoch": 1.7623189943245108, - "grad_norm": 2.6389870643615723, - "learning_rate": 2.0628016761258156e-05, - "loss": 0.6252, - "step": 199350 - }, - { - "epoch": 1.762407397584823, - "grad_norm": 6.058359622955322, - "learning_rate": 2.0626543373586284e-05, - "loss": 0.5675, - "step": 199360 - }, - { - "epoch": 1.762495800845135, - "grad_norm": 0.8558288812637329, - "learning_rate": 2.0625069985914413e-05, - "loss": 0.6076, - "step": 199370 - }, - { - "epoch": 1.7625842041054474, - "grad_norm": 5.600949287414551, - "learning_rate": 2.0623596598242544e-05, - "loss": 0.5871, - "step": 199380 - }, - { - "epoch": 1.7626726073657597, - "grad_norm": 2.670600414276123, - "learning_rate": 2.0622123210570673e-05, - "loss": 0.5181, - "step": 199390 - }, - { - "epoch": 1.7627610106260718, - "grad_norm": 2.0761938095092773, - "learning_rate": 2.06206498228988e-05, - "loss": 0.5393, - "step": 199400 - }, - { - "epoch": 1.7628494138863842, - "grad_norm": 3.9695093631744385, - "learning_rate": 2.0619176435226933e-05, - "loss": 0.6408, - "step": 199410 - }, - { - "epoch": 1.7629378171466965, - "grad_norm": 10.492831230163574, - "learning_rate": 2.061770304755506e-05, - "loss": 0.6675, - "step": 199420 - }, - { - "epoch": 1.7630262204070086, - "grad_norm": 4.89743185043335, - "learning_rate": 2.061622965988319e-05, - "loss": 0.5598, - "step": 199430 - }, - { - "epoch": 1.7631146236673207, - "grad_norm": 2.589043378829956, - "learning_rate": 2.0614756272211318e-05, - "loss": 0.653, - "step": 199440 - }, - { - "epoch": 1.763203026927633, - "grad_norm": 1.5037132501602173, - "learning_rate": 2.061328288453945e-05, - "loss": 0.6573, - "step": 199450 - }, - { - "epoch": 1.7632914301879454, - "grad_norm": 2.9797708988189697, - "learning_rate": 2.0611809496867578e-05, - "loss": 0.6749, - "step": 199460 - }, - { - "epoch": 1.7633798334482575, - "grad_norm": 1.983686089515686, - "learning_rate": 2.0610336109195706e-05, - "loss": 0.6909, - "step": 199470 - }, - { - "epoch": 1.7634682367085697, - "grad_norm": 1.4411038160324097, - "learning_rate": 2.0608862721523835e-05, - "loss": 0.5829, - "step": 199480 - }, - { - "epoch": 1.763556639968882, - "grad_norm": 1.2027361392974854, - "learning_rate": 2.0607389333851966e-05, - "loss": 0.5746, - "step": 199490 - }, - { - "epoch": 1.7636450432291944, - "grad_norm": 1.7734391689300537, - "learning_rate": 2.0605915946180095e-05, - "loss": 0.7385, - "step": 199500 - }, - { - "epoch": 1.7637334464895065, - "grad_norm": 1.9979277849197388, - "learning_rate": 2.0604442558508223e-05, - "loss": 0.5195, - "step": 199510 - }, - { - "epoch": 1.7638218497498188, - "grad_norm": 2.0859720706939697, - "learning_rate": 2.0602969170836355e-05, - "loss": 0.6381, - "step": 199520 - }, - { - "epoch": 1.7639102530101312, - "grad_norm": 2.4914655685424805, - "learning_rate": 2.0601495783164483e-05, - "loss": 0.545, - "step": 199530 - }, - { - "epoch": 1.7639986562704433, - "grad_norm": 2.902784824371338, - "learning_rate": 2.060002239549261e-05, - "loss": 0.4389, - "step": 199540 - }, - { - "epoch": 1.7640870595307554, - "grad_norm": 1.6960073709487915, - "learning_rate": 2.059854900782074e-05, - "loss": 0.619, - "step": 199550 - }, - { - "epoch": 1.7641754627910677, - "grad_norm": 2.486156940460205, - "learning_rate": 2.0597075620148872e-05, - "loss": 0.5329, - "step": 199560 - }, - { - "epoch": 1.76426386605138, - "grad_norm": 3.1031014919281006, - "learning_rate": 2.0595602232477e-05, - "loss": 0.5805, - "step": 199570 - }, - { - "epoch": 1.7643522693116922, - "grad_norm": 1.4590438604354858, - "learning_rate": 2.059412884480513e-05, - "loss": 0.6192, - "step": 199580 - }, - { - "epoch": 1.7644406725720043, - "grad_norm": 2.6685523986816406, - "learning_rate": 2.059265545713326e-05, - "loss": 0.5931, - "step": 199590 - }, - { - "epoch": 1.7645290758323167, - "grad_norm": 4.324309825897217, - "learning_rate": 2.059118206946139e-05, - "loss": 0.6099, - "step": 199600 - }, - { - "epoch": 1.764617479092629, - "grad_norm": 1.6499725580215454, - "learning_rate": 2.0589708681789517e-05, - "loss": 0.4791, - "step": 199610 - }, - { - "epoch": 1.7647058823529411, - "grad_norm": 2.315854072570801, - "learning_rate": 2.058823529411765e-05, - "loss": 0.6319, - "step": 199620 - }, - { - "epoch": 1.7647942856132535, - "grad_norm": 3.2634072303771973, - "learning_rate": 2.0586761906445777e-05, - "loss": 0.586, - "step": 199630 - }, - { - "epoch": 1.7648826888735658, - "grad_norm": 4.387502193450928, - "learning_rate": 2.0585288518773905e-05, - "loss": 0.5497, - "step": 199640 - }, - { - "epoch": 1.764971092133878, - "grad_norm": 2.499368190765381, - "learning_rate": 2.0583815131102037e-05, - "loss": 0.6791, - "step": 199650 - }, - { - "epoch": 1.76505949539419, - "grad_norm": 1.9002959728240967, - "learning_rate": 2.0582341743430165e-05, - "loss": 0.5873, - "step": 199660 - }, - { - "epoch": 1.7651478986545024, - "grad_norm": 11.747011184692383, - "learning_rate": 2.0580868355758294e-05, - "loss": 0.6854, - "step": 199670 - }, - { - "epoch": 1.7652363019148147, - "grad_norm": 3.0913946628570557, - "learning_rate": 2.0579394968086426e-05, - "loss": 0.6433, - "step": 199680 - }, - { - "epoch": 1.7653247051751269, - "grad_norm": 2.5673446655273438, - "learning_rate": 2.0577921580414554e-05, - "loss": 0.5589, - "step": 199690 - }, - { - "epoch": 1.765413108435439, - "grad_norm": 5.331379413604736, - "learning_rate": 2.0576448192742682e-05, - "loss": 0.5776, - "step": 199700 - }, - { - "epoch": 1.7655015116957513, - "grad_norm": 2.3399834632873535, - "learning_rate": 2.0574974805070814e-05, - "loss": 0.4646, - "step": 199710 - }, - { - "epoch": 1.7655899149560637, - "grad_norm": 5.019132137298584, - "learning_rate": 2.0573501417398942e-05, - "loss": 0.6931, - "step": 199720 - }, - { - "epoch": 1.7656783182163758, - "grad_norm": 10.349445343017578, - "learning_rate": 2.057202802972707e-05, - "loss": 0.6039, - "step": 199730 - }, - { - "epoch": 1.765766721476688, - "grad_norm": 17.549537658691406, - "learning_rate": 2.0570554642055202e-05, - "loss": 0.6005, - "step": 199740 - }, - { - "epoch": 1.7658551247370005, - "grad_norm": 2.8126816749572754, - "learning_rate": 2.056908125438333e-05, - "loss": 0.692, - "step": 199750 - }, - { - "epoch": 1.7659435279973126, - "grad_norm": 3.191575050354004, - "learning_rate": 2.056760786671146e-05, - "loss": 0.5962, - "step": 199760 - }, - { - "epoch": 1.7660319312576247, - "grad_norm": 2.158874750137329, - "learning_rate": 2.056613447903959e-05, - "loss": 0.7582, - "step": 199770 - }, - { - "epoch": 1.766120334517937, - "grad_norm": 1.3560339212417603, - "learning_rate": 2.056466109136772e-05, - "loss": 0.5423, - "step": 199780 - }, - { - "epoch": 1.7662087377782494, - "grad_norm": 2.2288689613342285, - "learning_rate": 2.0563187703695848e-05, - "loss": 0.6104, - "step": 199790 - }, - { - "epoch": 1.7662971410385615, - "grad_norm": 1.4673881530761719, - "learning_rate": 2.0561714316023976e-05, - "loss": 0.6268, - "step": 199800 - }, - { - "epoch": 1.7663855442988736, - "grad_norm": 1.5412003993988037, - "learning_rate": 2.0560240928352108e-05, - "loss": 0.5949, - "step": 199810 - }, - { - "epoch": 1.766473947559186, - "grad_norm": 1.4000840187072754, - "learning_rate": 2.0558767540680236e-05, - "loss": 0.5762, - "step": 199820 - }, - { - "epoch": 1.7665623508194983, - "grad_norm": 1.1794644594192505, - "learning_rate": 2.0557294153008364e-05, - "loss": 0.6181, - "step": 199830 - }, - { - "epoch": 1.7666507540798104, - "grad_norm": 4.9438676834106445, - "learning_rate": 2.0555820765336493e-05, - "loss": 0.5657, - "step": 199840 - }, - { - "epoch": 1.7667391573401225, - "grad_norm": 1.6453328132629395, - "learning_rate": 2.0554347377664625e-05, - "loss": 0.4458, - "step": 199850 - }, - { - "epoch": 1.766827560600435, - "grad_norm": 1.2419538497924805, - "learning_rate": 2.0552873989992753e-05, - "loss": 0.5685, - "step": 199860 - }, - { - "epoch": 1.7669159638607472, - "grad_norm": 1.0484174489974976, - "learning_rate": 2.055140060232088e-05, - "loss": 0.605, - "step": 199870 - }, - { - "epoch": 1.7670043671210593, - "grad_norm": 1.3979978561401367, - "learning_rate": 2.0549927214649013e-05, - "loss": 0.6089, - "step": 199880 - }, - { - "epoch": 1.7670927703813717, - "grad_norm": 2.2974133491516113, - "learning_rate": 2.054845382697714e-05, - "loss": 0.5482, - "step": 199890 - }, - { - "epoch": 1.767181173641684, - "grad_norm": 14.60004997253418, - "learning_rate": 2.054698043930527e-05, - "loss": 0.6268, - "step": 199900 - }, - { - "epoch": 1.7672695769019962, - "grad_norm": 4.126373291015625, - "learning_rate": 2.0545507051633398e-05, - "loss": 0.5838, - "step": 199910 - }, - { - "epoch": 1.7673579801623083, - "grad_norm": 3.4054360389709473, - "learning_rate": 2.054403366396153e-05, - "loss": 0.6252, - "step": 199920 - }, - { - "epoch": 1.7674463834226206, - "grad_norm": 6.496740818023682, - "learning_rate": 2.0542560276289658e-05, - "loss": 0.587, - "step": 199930 - }, - { - "epoch": 1.767534786682933, - "grad_norm": 2.177032947540283, - "learning_rate": 2.0541086888617786e-05, - "loss": 0.5029, - "step": 199940 - }, - { - "epoch": 1.767623189943245, - "grad_norm": 2.3139541149139404, - "learning_rate": 2.0539613500945915e-05, - "loss": 0.6176, - "step": 199950 - }, - { - "epoch": 1.7677115932035572, - "grad_norm": 8.146818161010742, - "learning_rate": 2.0538140113274047e-05, - "loss": 0.6634, - "step": 199960 - }, - { - "epoch": 1.7677999964638695, - "grad_norm": 1.7606213092803955, - "learning_rate": 2.0536666725602175e-05, - "loss": 0.5378, - "step": 199970 - }, - { - "epoch": 1.7678883997241819, - "grad_norm": 3.540778398513794, - "learning_rate": 2.0535193337930303e-05, - "loss": 0.6441, - "step": 199980 - }, - { - "epoch": 1.767976802984494, - "grad_norm": 4.727563858032227, - "learning_rate": 2.0533719950258435e-05, - "loss": 0.6939, - "step": 199990 - }, - { - "epoch": 1.7680652062448063, - "grad_norm": 2.876376152038574, - "learning_rate": 2.0532246562586563e-05, - "loss": 0.5114, - "step": 200000 - }, - { - "epoch": 1.7681536095051187, - "grad_norm": 2.6525161266326904, - "learning_rate": 2.0530773174914692e-05, - "loss": 0.5812, - "step": 200010 - }, - { - "epoch": 1.7682420127654308, - "grad_norm": 3.602836847305298, - "learning_rate": 2.052929978724282e-05, - "loss": 0.4607, - "step": 200020 - }, - { - "epoch": 1.768330416025743, - "grad_norm": 0.9413797855377197, - "learning_rate": 2.0527826399570952e-05, - "loss": 0.5367, - "step": 200030 - }, - { - "epoch": 1.7684188192860553, - "grad_norm": 2.601632833480835, - "learning_rate": 2.052635301189908e-05, - "loss": 0.664, - "step": 200040 - }, - { - "epoch": 1.7685072225463676, - "grad_norm": 7.349104881286621, - "learning_rate": 2.052487962422721e-05, - "loss": 0.5638, - "step": 200050 - }, - { - "epoch": 1.7685956258066797, - "grad_norm": 3.8090150356292725, - "learning_rate": 2.052340623655534e-05, - "loss": 0.5718, - "step": 200060 - }, - { - "epoch": 1.7686840290669918, - "grad_norm": 5.953371047973633, - "learning_rate": 2.052193284888347e-05, - "loss": 0.6368, - "step": 200070 - }, - { - "epoch": 1.7687724323273042, - "grad_norm": 3.4271018505096436, - "learning_rate": 2.0520459461211597e-05, - "loss": 0.5389, - "step": 200080 - }, - { - "epoch": 1.7688608355876165, - "grad_norm": 1.8100402355194092, - "learning_rate": 2.0518986073539725e-05, - "loss": 0.6928, - "step": 200090 - }, - { - "epoch": 1.7689492388479287, - "grad_norm": 8.461389541625977, - "learning_rate": 2.0517512685867857e-05, - "loss": 0.665, - "step": 200100 - }, - { - "epoch": 1.769037642108241, - "grad_norm": 3.565764904022217, - "learning_rate": 2.0516039298195985e-05, - "loss": 0.6226, - "step": 200110 - }, - { - "epoch": 1.7691260453685533, - "grad_norm": 1.8190014362335205, - "learning_rate": 2.0514565910524114e-05, - "loss": 0.5624, - "step": 200120 - }, - { - "epoch": 1.7692144486288655, - "grad_norm": 17.462963104248047, - "learning_rate": 2.0513092522852242e-05, - "loss": 0.4893, - "step": 200130 - }, - { - "epoch": 1.7693028518891776, - "grad_norm": 5.460578441619873, - "learning_rate": 2.0511619135180374e-05, - "loss": 0.5877, - "step": 200140 - }, - { - "epoch": 1.76939125514949, - "grad_norm": 3.6186656951904297, - "learning_rate": 2.0510145747508502e-05, - "loss": 0.6442, - "step": 200150 - }, - { - "epoch": 1.7694796584098023, - "grad_norm": 0.7906415462493896, - "learning_rate": 2.050867235983663e-05, - "loss": 0.6767, - "step": 200160 - }, - { - "epoch": 1.7695680616701144, - "grad_norm": 3.094832420349121, - "learning_rate": 2.0507198972164762e-05, - "loss": 0.6337, - "step": 200170 - }, - { - "epoch": 1.7696564649304265, - "grad_norm": 2.70182204246521, - "learning_rate": 2.050572558449289e-05, - "loss": 0.5912, - "step": 200180 - }, - { - "epoch": 1.7697448681907388, - "grad_norm": 2.904658079147339, - "learning_rate": 2.050425219682102e-05, - "loss": 0.5743, - "step": 200190 - }, - { - "epoch": 1.7698332714510512, - "grad_norm": 7.599824905395508, - "learning_rate": 2.0502778809149147e-05, - "loss": 0.5784, - "step": 200200 - }, - { - "epoch": 1.7699216747113633, - "grad_norm": 2.563567876815796, - "learning_rate": 2.050130542147728e-05, - "loss": 0.476, - "step": 200210 - }, - { - "epoch": 1.7700100779716756, - "grad_norm": 3.0157346725463867, - "learning_rate": 2.0499832033805407e-05, - "loss": 0.5543, - "step": 200220 - }, - { - "epoch": 1.770098481231988, - "grad_norm": 2.0201637744903564, - "learning_rate": 2.0498358646133536e-05, - "loss": 0.6047, - "step": 200230 - }, - { - "epoch": 1.7701868844923, - "grad_norm": 2.5668652057647705, - "learning_rate": 2.0496885258461668e-05, - "loss": 0.7256, - "step": 200240 - }, - { - "epoch": 1.7702752877526122, - "grad_norm": 2.552210569381714, - "learning_rate": 2.0495411870789796e-05, - "loss": 0.6268, - "step": 200250 - }, - { - "epoch": 1.7703636910129246, - "grad_norm": 9.171015739440918, - "learning_rate": 2.0493938483117924e-05, - "loss": 0.5236, - "step": 200260 - }, - { - "epoch": 1.770452094273237, - "grad_norm": 2.7152488231658936, - "learning_rate": 2.0492465095446053e-05, - "loss": 0.6378, - "step": 200270 - }, - { - "epoch": 1.770540497533549, - "grad_norm": 7.269675254821777, - "learning_rate": 2.0490991707774184e-05, - "loss": 0.5716, - "step": 200280 - }, - { - "epoch": 1.7706289007938611, - "grad_norm": 1.826249361038208, - "learning_rate": 2.0489518320102313e-05, - "loss": 0.4896, - "step": 200290 - }, - { - "epoch": 1.7707173040541735, - "grad_norm": 1.7940057516098022, - "learning_rate": 2.048804493243044e-05, - "loss": 0.6147, - "step": 200300 - }, - { - "epoch": 1.7708057073144858, - "grad_norm": 1.2506428956985474, - "learning_rate": 2.048657154475857e-05, - "loss": 0.6026, - "step": 200310 - }, - { - "epoch": 1.770894110574798, - "grad_norm": 5.991634368896484, - "learning_rate": 2.04850981570867e-05, - "loss": 0.4239, - "step": 200320 - }, - { - "epoch": 1.77098251383511, - "grad_norm": 8.469026565551758, - "learning_rate": 2.048362476941483e-05, - "loss": 0.5772, - "step": 200330 - }, - { - "epoch": 1.7710709170954226, - "grad_norm": 2.0407445430755615, - "learning_rate": 2.0482151381742958e-05, - "loss": 0.5582, - "step": 200340 - }, - { - "epoch": 1.7711593203557348, - "grad_norm": 1.7195005416870117, - "learning_rate": 2.048067799407109e-05, - "loss": 0.5878, - "step": 200350 - }, - { - "epoch": 1.7712477236160469, - "grad_norm": 1.9609400033950806, - "learning_rate": 2.0479204606399218e-05, - "loss": 0.4709, - "step": 200360 - }, - { - "epoch": 1.7713361268763592, - "grad_norm": 3.1894423961639404, - "learning_rate": 2.0477731218727346e-05, - "loss": 0.653, - "step": 200370 - }, - { - "epoch": 1.7714245301366716, - "grad_norm": 2.48327898979187, - "learning_rate": 2.0476257831055475e-05, - "loss": 0.6086, - "step": 200380 - }, - { - "epoch": 1.7715129333969837, - "grad_norm": 2.1797211170196533, - "learning_rate": 2.0474784443383606e-05, - "loss": 0.7471, - "step": 200390 - }, - { - "epoch": 1.7716013366572958, - "grad_norm": 1.8306154012680054, - "learning_rate": 2.0473311055711735e-05, - "loss": 0.6601, - "step": 200400 - }, - { - "epoch": 1.7716897399176081, - "grad_norm": 3.669273614883423, - "learning_rate": 2.0471837668039863e-05, - "loss": 0.5518, - "step": 200410 - }, - { - "epoch": 1.7717781431779205, - "grad_norm": 2.3546721935272217, - "learning_rate": 2.047036428036799e-05, - "loss": 0.6494, - "step": 200420 - }, - { - "epoch": 1.7718665464382326, - "grad_norm": 8.568611145019531, - "learning_rate": 2.0468890892696123e-05, - "loss": 0.6568, - "step": 200430 - }, - { - "epoch": 1.7719549496985447, - "grad_norm": 13.436927795410156, - "learning_rate": 2.046741750502425e-05, - "loss": 0.5846, - "step": 200440 - }, - { - "epoch": 1.7720433529588573, - "grad_norm": 23.046981811523438, - "learning_rate": 2.046594411735238e-05, - "loss": 0.6407, - "step": 200450 - }, - { - "epoch": 1.7721317562191694, - "grad_norm": 0.9065667986869812, - "learning_rate": 2.046447072968051e-05, - "loss": 0.5297, - "step": 200460 - }, - { - "epoch": 1.7722201594794815, - "grad_norm": 2.932445526123047, - "learning_rate": 2.046299734200864e-05, - "loss": 0.5539, - "step": 200470 - }, - { - "epoch": 1.7723085627397939, - "grad_norm": 6.372934341430664, - "learning_rate": 2.046152395433677e-05, - "loss": 0.5858, - "step": 200480 - }, - { - "epoch": 1.7723969660001062, - "grad_norm": 1.7840017080307007, - "learning_rate": 2.0460050566664897e-05, - "loss": 0.6135, - "step": 200490 - }, - { - "epoch": 1.7724853692604183, - "grad_norm": 5.092711448669434, - "learning_rate": 2.045857717899303e-05, - "loss": 0.5872, - "step": 200500 - }, - { - "epoch": 1.7725737725207305, - "grad_norm": 4.186411380767822, - "learning_rate": 2.0457103791321157e-05, - "loss": 0.6856, - "step": 200510 - }, - { - "epoch": 1.7726621757810428, - "grad_norm": 4.1263275146484375, - "learning_rate": 2.0455630403649285e-05, - "loss": 0.5244, - "step": 200520 - }, - { - "epoch": 1.7727505790413551, - "grad_norm": 1.3254998922348022, - "learning_rate": 2.0454157015977417e-05, - "loss": 0.5811, - "step": 200530 - }, - { - "epoch": 1.7728389823016673, - "grad_norm": 3.523235559463501, - "learning_rate": 2.0452683628305545e-05, - "loss": 0.7075, - "step": 200540 - }, - { - "epoch": 1.7729273855619794, - "grad_norm": 6.180381774902344, - "learning_rate": 2.0451210240633674e-05, - "loss": 0.4999, - "step": 200550 - }, - { - "epoch": 1.7730157888222917, - "grad_norm": 6.181248664855957, - "learning_rate": 2.0449736852961805e-05, - "loss": 0.6462, - "step": 200560 - }, - { - "epoch": 1.773104192082604, - "grad_norm": 2.5143160820007324, - "learning_rate": 2.0448263465289934e-05, - "loss": 0.6671, - "step": 200570 - }, - { - "epoch": 1.7731925953429162, - "grad_norm": 3.0866026878356934, - "learning_rate": 2.0446790077618062e-05, - "loss": 0.5643, - "step": 200580 - }, - { - "epoch": 1.7732809986032285, - "grad_norm": 3.386183500289917, - "learning_rate": 2.0445316689946194e-05, - "loss": 0.6616, - "step": 200590 - }, - { - "epoch": 1.7733694018635409, - "grad_norm": 8.194581985473633, - "learning_rate": 2.0443843302274322e-05, - "loss": 0.4982, - "step": 200600 - }, - { - "epoch": 1.773457805123853, - "grad_norm": 1.739749550819397, - "learning_rate": 2.044236991460245e-05, - "loss": 0.5744, - "step": 200610 - }, - { - "epoch": 1.773546208384165, - "grad_norm": 5.339415073394775, - "learning_rate": 2.0440896526930582e-05, - "loss": 0.678, - "step": 200620 - }, - { - "epoch": 1.7736346116444774, - "grad_norm": 2.5839920043945312, - "learning_rate": 2.043942313925871e-05, - "loss": 0.574, - "step": 200630 - }, - { - "epoch": 1.7737230149047898, - "grad_norm": 1.4898864030838013, - "learning_rate": 2.043794975158684e-05, - "loss": 0.7052, - "step": 200640 - }, - { - "epoch": 1.773811418165102, - "grad_norm": 1.386272668838501, - "learning_rate": 2.043647636391497e-05, - "loss": 0.4552, - "step": 200650 - }, - { - "epoch": 1.773899821425414, - "grad_norm": 1.7696417570114136, - "learning_rate": 2.04350029762431e-05, - "loss": 0.5599, - "step": 200660 - }, - { - "epoch": 1.7739882246857264, - "grad_norm": 2.029740333557129, - "learning_rate": 2.0433529588571227e-05, - "loss": 0.5294, - "step": 200670 - }, - { - "epoch": 1.7740766279460387, - "grad_norm": 12.559075355529785, - "learning_rate": 2.043205620089936e-05, - "loss": 0.5891, - "step": 200680 - }, - { - "epoch": 1.7741650312063508, - "grad_norm": 6.567686557769775, - "learning_rate": 2.0430582813227488e-05, - "loss": 0.6184, - "step": 200690 - }, - { - "epoch": 1.7742534344666632, - "grad_norm": 5.650191307067871, - "learning_rate": 2.0429109425555616e-05, - "loss": 0.5118, - "step": 200700 - }, - { - "epoch": 1.7743418377269755, - "grad_norm": 4.373534202575684, - "learning_rate": 2.0427636037883748e-05, - "loss": 0.482, - "step": 200710 - }, - { - "epoch": 1.7744302409872876, - "grad_norm": 2.541811466217041, - "learning_rate": 2.0426162650211876e-05, - "loss": 0.5296, - "step": 200720 - }, - { - "epoch": 1.7745186442475998, - "grad_norm": 7.511800289154053, - "learning_rate": 2.0424689262540004e-05, - "loss": 0.5839, - "step": 200730 - }, - { - "epoch": 1.774607047507912, - "grad_norm": 5.731058120727539, - "learning_rate": 2.0423215874868133e-05, - "loss": 0.5132, - "step": 200740 - }, - { - "epoch": 1.7746954507682244, - "grad_norm": 1.3647056818008423, - "learning_rate": 2.0421742487196264e-05, - "loss": 0.7385, - "step": 200750 - }, - { - "epoch": 1.7747838540285366, - "grad_norm": 1.5348931550979614, - "learning_rate": 2.0420269099524393e-05, - "loss": 0.6126, - "step": 200760 - }, - { - "epoch": 1.7748722572888487, - "grad_norm": 11.28753662109375, - "learning_rate": 2.041879571185252e-05, - "loss": 0.6272, - "step": 200770 - }, - { - "epoch": 1.774960660549161, - "grad_norm": 4.322978496551514, - "learning_rate": 2.041732232418065e-05, - "loss": 0.5384, - "step": 200780 - }, - { - "epoch": 1.7750490638094734, - "grad_norm": 1.7618731260299683, - "learning_rate": 2.041584893650878e-05, - "loss": 0.6147, - "step": 200790 - }, - { - "epoch": 1.7751374670697855, - "grad_norm": 16.58060073852539, - "learning_rate": 2.041437554883691e-05, - "loss": 0.649, - "step": 200800 - }, - { - "epoch": 1.7752258703300978, - "grad_norm": 1.241051197052002, - "learning_rate": 2.0412902161165038e-05, - "loss": 0.592, - "step": 200810 - }, - { - "epoch": 1.7753142735904102, - "grad_norm": 2.0913937091827393, - "learning_rate": 2.041142877349317e-05, - "loss": 0.7277, - "step": 200820 - }, - { - "epoch": 1.7754026768507223, - "grad_norm": 2.897819995880127, - "learning_rate": 2.0409955385821298e-05, - "loss": 0.6609, - "step": 200830 - }, - { - "epoch": 1.7754910801110344, - "grad_norm": 2.172450065612793, - "learning_rate": 2.0408481998149426e-05, - "loss": 0.6171, - "step": 200840 - }, - { - "epoch": 1.7755794833713467, - "grad_norm": 1.255169153213501, - "learning_rate": 2.0407008610477555e-05, - "loss": 0.5725, - "step": 200850 - }, - { - "epoch": 1.775667886631659, - "grad_norm": 1.731946587562561, - "learning_rate": 2.0405535222805687e-05, - "loss": 0.5666, - "step": 200860 - }, - { - "epoch": 1.7757562898919712, - "grad_norm": 4.914487838745117, - "learning_rate": 2.0404061835133815e-05, - "loss": 0.701, - "step": 200870 - }, - { - "epoch": 1.7758446931522833, - "grad_norm": 2.3087074756622314, - "learning_rate": 2.0402588447461943e-05, - "loss": 0.5716, - "step": 200880 - }, - { - "epoch": 1.7759330964125957, - "grad_norm": 1.1194977760314941, - "learning_rate": 2.040111505979007e-05, - "loss": 0.5854, - "step": 200890 - }, - { - "epoch": 1.776021499672908, - "grad_norm": 1.1009505987167358, - "learning_rate": 2.0399641672118203e-05, - "loss": 0.565, - "step": 200900 - }, - { - "epoch": 1.7761099029332201, - "grad_norm": 2.3220832347869873, - "learning_rate": 2.039816828444633e-05, - "loss": 0.5319, - "step": 200910 - }, - { - "epoch": 1.7761983061935325, - "grad_norm": 7.326570510864258, - "learning_rate": 2.039669489677446e-05, - "loss": 0.7092, - "step": 200920 - }, - { - "epoch": 1.7762867094538448, - "grad_norm": 2.641589641571045, - "learning_rate": 2.0395221509102592e-05, - "loss": 0.5615, - "step": 200930 - }, - { - "epoch": 1.776375112714157, - "grad_norm": 1.013283610343933, - "learning_rate": 2.039374812143072e-05, - "loss": 0.5636, - "step": 200940 - }, - { - "epoch": 1.776463515974469, - "grad_norm": 1.1645312309265137, - "learning_rate": 2.039227473375885e-05, - "loss": 0.5778, - "step": 200950 - }, - { - "epoch": 1.7765519192347814, - "grad_norm": 2.746457815170288, - "learning_rate": 2.0390801346086977e-05, - "loss": 0.572, - "step": 200960 - }, - { - "epoch": 1.7766403224950937, - "grad_norm": 4.176667213439941, - "learning_rate": 2.038932795841511e-05, - "loss": 0.5899, - "step": 200970 - }, - { - "epoch": 1.7767287257554059, - "grad_norm": 3.438382387161255, - "learning_rate": 2.0387854570743237e-05, - "loss": 0.5251, - "step": 200980 - }, - { - "epoch": 1.776817129015718, - "grad_norm": 3.557908535003662, - "learning_rate": 2.0386381183071365e-05, - "loss": 0.5072, - "step": 200990 - }, - { - "epoch": 1.7769055322760303, - "grad_norm": 6.518289089202881, - "learning_rate": 2.0384907795399497e-05, - "loss": 0.563, - "step": 201000 - }, - { - "epoch": 1.7769939355363427, - "grad_norm": 1.248594880104065, - "learning_rate": 2.0383434407727625e-05, - "loss": 0.5615, - "step": 201010 - }, - { - "epoch": 1.7770823387966548, - "grad_norm": 6.089591979980469, - "learning_rate": 2.0381961020055754e-05, - "loss": 0.5475, - "step": 201020 - }, - { - "epoch": 1.777170742056967, - "grad_norm": 11.350852012634277, - "learning_rate": 2.0380487632383882e-05, - "loss": 0.5913, - "step": 201030 - }, - { - "epoch": 1.7772591453172795, - "grad_norm": 1.9228618144989014, - "learning_rate": 2.0379014244712014e-05, - "loss": 0.5296, - "step": 201040 - }, - { - "epoch": 1.7773475485775916, - "grad_norm": 7.04586935043335, - "learning_rate": 2.0377540857040142e-05, - "loss": 0.6224, - "step": 201050 - }, - { - "epoch": 1.7774359518379037, - "grad_norm": 11.666397094726562, - "learning_rate": 2.037606746936827e-05, - "loss": 0.5715, - "step": 201060 - }, - { - "epoch": 1.777524355098216, - "grad_norm": 15.812749862670898, - "learning_rate": 2.03745940816964e-05, - "loss": 0.611, - "step": 201070 - }, - { - "epoch": 1.7776127583585284, - "grad_norm": 5.996926784515381, - "learning_rate": 2.037312069402453e-05, - "loss": 0.6422, - "step": 201080 - }, - { - "epoch": 1.7777011616188405, - "grad_norm": 1.75562584400177, - "learning_rate": 2.037164730635266e-05, - "loss": 0.5885, - "step": 201090 - }, - { - "epoch": 1.7777895648791526, - "grad_norm": 1.073387622833252, - "learning_rate": 2.0370173918680787e-05, - "loss": 0.6348, - "step": 201100 - }, - { - "epoch": 1.777877968139465, - "grad_norm": 5.5727996826171875, - "learning_rate": 2.036870053100892e-05, - "loss": 0.5176, - "step": 201110 - }, - { - "epoch": 1.7779663713997773, - "grad_norm": 1.9805892705917358, - "learning_rate": 2.0367227143337047e-05, - "loss": 0.694, - "step": 201120 - }, - { - "epoch": 1.7780547746600894, - "grad_norm": 3.733828544616699, - "learning_rate": 2.0365753755665176e-05, - "loss": 0.7036, - "step": 201130 - }, - { - "epoch": 1.7781431779204016, - "grad_norm": 5.081565856933594, - "learning_rate": 2.0364280367993304e-05, - "loss": 0.7147, - "step": 201140 - }, - { - "epoch": 1.778231581180714, - "grad_norm": 3.3621277809143066, - "learning_rate": 2.0362806980321436e-05, - "loss": 0.5959, - "step": 201150 - }, - { - "epoch": 1.7783199844410262, - "grad_norm": 2.002547025680542, - "learning_rate": 2.0361333592649564e-05, - "loss": 0.5854, - "step": 201160 - }, - { - "epoch": 1.7784083877013384, - "grad_norm": 1.1885132789611816, - "learning_rate": 2.0359860204977693e-05, - "loss": 0.6929, - "step": 201170 - }, - { - "epoch": 1.7784967909616507, - "grad_norm": 1.0890119075775146, - "learning_rate": 2.0358386817305824e-05, - "loss": 0.5461, - "step": 201180 - }, - { - "epoch": 1.778585194221963, - "grad_norm": 2.4033539295196533, - "learning_rate": 2.0356913429633953e-05, - "loss": 0.5506, - "step": 201190 - }, - { - "epoch": 1.7786735974822752, - "grad_norm": 4.361912727355957, - "learning_rate": 2.035544004196208e-05, - "loss": 0.6374, - "step": 201200 - }, - { - "epoch": 1.7787620007425873, - "grad_norm": 1.5721828937530518, - "learning_rate": 2.035396665429021e-05, - "loss": 0.5158, - "step": 201210 - }, - { - "epoch": 1.7788504040028996, - "grad_norm": 3.3955421447753906, - "learning_rate": 2.035249326661834e-05, - "loss": 0.8143, - "step": 201220 - }, - { - "epoch": 1.778938807263212, - "grad_norm": 2.908384084701538, - "learning_rate": 2.035101987894647e-05, - "loss": 0.6386, - "step": 201230 - }, - { - "epoch": 1.779027210523524, - "grad_norm": 4.739225387573242, - "learning_rate": 2.0349546491274598e-05, - "loss": 0.674, - "step": 201240 - }, - { - "epoch": 1.7791156137838362, - "grad_norm": 2.748955488204956, - "learning_rate": 2.0348073103602726e-05, - "loss": 0.5438, - "step": 201250 - }, - { - "epoch": 1.7792040170441485, - "grad_norm": 2.6617844104766846, - "learning_rate": 2.0346599715930858e-05, - "loss": 0.6165, - "step": 201260 - }, - { - "epoch": 1.7792924203044609, - "grad_norm": 6.649594306945801, - "learning_rate": 2.0345126328258986e-05, - "loss": 0.4528, - "step": 201270 - }, - { - "epoch": 1.779380823564773, - "grad_norm": 2.950157880783081, - "learning_rate": 2.0343652940587115e-05, - "loss": 0.6148, - "step": 201280 - }, - { - "epoch": 1.7794692268250853, - "grad_norm": 1.084587574005127, - "learning_rate": 2.0342179552915246e-05, - "loss": 0.5, - "step": 201290 - }, - { - "epoch": 1.7795576300853977, - "grad_norm": 0.8753800392150879, - "learning_rate": 2.0340706165243375e-05, - "loss": 0.527, - "step": 201300 - }, - { - "epoch": 1.7796460333457098, - "grad_norm": 1.71090829372406, - "learning_rate": 2.0339232777571503e-05, - "loss": 0.5337, - "step": 201310 - }, - { - "epoch": 1.779734436606022, - "grad_norm": 2.7448437213897705, - "learning_rate": 2.033775938989963e-05, - "loss": 0.5433, - "step": 201320 - }, - { - "epoch": 1.7798228398663343, - "grad_norm": 4.972217082977295, - "learning_rate": 2.0336286002227763e-05, - "loss": 0.7051, - "step": 201330 - }, - { - "epoch": 1.7799112431266466, - "grad_norm": 2.407299518585205, - "learning_rate": 2.033481261455589e-05, - "loss": 0.6354, - "step": 201340 - }, - { - "epoch": 1.7799996463869587, - "grad_norm": 15.180646896362305, - "learning_rate": 2.033333922688402e-05, - "loss": 0.6935, - "step": 201350 - }, - { - "epoch": 1.7800880496472709, - "grad_norm": 1.1638933420181274, - "learning_rate": 2.033186583921215e-05, - "loss": 0.6537, - "step": 201360 - }, - { - "epoch": 1.7801764529075832, - "grad_norm": 2.0126402378082275, - "learning_rate": 2.033039245154028e-05, - "loss": 0.5021, - "step": 201370 - }, - { - "epoch": 1.7802648561678955, - "grad_norm": 1.1321524381637573, - "learning_rate": 2.032891906386841e-05, - "loss": 0.4838, - "step": 201380 - }, - { - "epoch": 1.7803532594282077, - "grad_norm": 4.253654956817627, - "learning_rate": 2.0327445676196537e-05, - "loss": 0.6997, - "step": 201390 - }, - { - "epoch": 1.78044166268852, - "grad_norm": 3.022303581237793, - "learning_rate": 2.032597228852467e-05, - "loss": 0.5443, - "step": 201400 - }, - { - "epoch": 1.7805300659488323, - "grad_norm": 3.3942525386810303, - "learning_rate": 2.0324498900852797e-05, - "loss": 0.479, - "step": 201410 - }, - { - "epoch": 1.7806184692091445, - "grad_norm": 6.061913967132568, - "learning_rate": 2.0323025513180925e-05, - "loss": 0.5148, - "step": 201420 - }, - { - "epoch": 1.7807068724694566, - "grad_norm": 3.260727882385254, - "learning_rate": 2.0321552125509054e-05, - "loss": 0.6581, - "step": 201430 - }, - { - "epoch": 1.780795275729769, - "grad_norm": 5.648933410644531, - "learning_rate": 2.0320078737837185e-05, - "loss": 0.6565, - "step": 201440 - }, - { - "epoch": 1.7808836789900813, - "grad_norm": 2.1177427768707275, - "learning_rate": 2.0318605350165314e-05, - "loss": 0.5784, - "step": 201450 - }, - { - "epoch": 1.7809720822503934, - "grad_norm": 3.3396449089050293, - "learning_rate": 2.0317131962493442e-05, - "loss": 0.6284, - "step": 201460 - }, - { - "epoch": 1.7810604855107055, - "grad_norm": 1.3688477277755737, - "learning_rate": 2.0315658574821574e-05, - "loss": 0.5571, - "step": 201470 - }, - { - "epoch": 1.7811488887710178, - "grad_norm": 1.3339372873306274, - "learning_rate": 2.0314185187149702e-05, - "loss": 0.477, - "step": 201480 - }, - { - "epoch": 1.7812372920313302, - "grad_norm": 23.196277618408203, - "learning_rate": 2.031271179947783e-05, - "loss": 0.5992, - "step": 201490 - }, - { - "epoch": 1.7813256952916423, - "grad_norm": 4.477624893188477, - "learning_rate": 2.0311238411805962e-05, - "loss": 0.6204, - "step": 201500 - }, - { - "epoch": 1.7814140985519547, - "grad_norm": 4.52294921875, - "learning_rate": 2.030976502413409e-05, - "loss": 0.6913, - "step": 201510 - }, - { - "epoch": 1.781502501812267, - "grad_norm": 1.8961204290390015, - "learning_rate": 2.0308291636462222e-05, - "loss": 0.4341, - "step": 201520 - }, - { - "epoch": 1.7815909050725791, - "grad_norm": 3.135321617126465, - "learning_rate": 2.030681824879035e-05, - "loss": 0.5982, - "step": 201530 - }, - { - "epoch": 1.7816793083328912, - "grad_norm": 1.6723718643188477, - "learning_rate": 2.030534486111848e-05, - "loss": 0.6095, - "step": 201540 - }, - { - "epoch": 1.7817677115932036, - "grad_norm": 9.863723754882812, - "learning_rate": 2.030387147344661e-05, - "loss": 0.5759, - "step": 201550 - }, - { - "epoch": 1.781856114853516, - "grad_norm": 3.2073895931243896, - "learning_rate": 2.030239808577474e-05, - "loss": 0.5872, - "step": 201560 - }, - { - "epoch": 1.781944518113828, - "grad_norm": 21.227392196655273, - "learning_rate": 2.0300924698102867e-05, - "loss": 0.5558, - "step": 201570 - }, - { - "epoch": 1.7820329213741402, - "grad_norm": 1.1915196180343628, - "learning_rate": 2.0299451310431e-05, - "loss": 0.6041, - "step": 201580 - }, - { - "epoch": 1.7821213246344525, - "grad_norm": 1.779495120048523, - "learning_rate": 2.0297977922759127e-05, - "loss": 0.6486, - "step": 201590 - }, - { - "epoch": 1.7822097278947648, - "grad_norm": 2.8377809524536133, - "learning_rate": 2.0296504535087256e-05, - "loss": 0.5171, - "step": 201600 - }, - { - "epoch": 1.782298131155077, - "grad_norm": 1.758028268814087, - "learning_rate": 2.0295031147415384e-05, - "loss": 0.5747, - "step": 201610 - }, - { - "epoch": 1.782386534415389, - "grad_norm": 6.24250602722168, - "learning_rate": 2.0293557759743516e-05, - "loss": 0.4505, - "step": 201620 - }, - { - "epoch": 1.7824749376757016, - "grad_norm": 2.263643741607666, - "learning_rate": 2.0292084372071644e-05, - "loss": 0.5904, - "step": 201630 - }, - { - "epoch": 1.7825633409360138, - "grad_norm": 0.8728529214859009, - "learning_rate": 2.0290610984399773e-05, - "loss": 0.7096, - "step": 201640 - }, - { - "epoch": 1.7826517441963259, - "grad_norm": 4.5690460205078125, - "learning_rate": 2.0289137596727904e-05, - "loss": 0.6131, - "step": 201650 - }, - { - "epoch": 1.7827401474566382, - "grad_norm": 1.0904308557510376, - "learning_rate": 2.0287664209056033e-05, - "loss": 0.6517, - "step": 201660 - }, - { - "epoch": 1.7828285507169506, - "grad_norm": 2.5785434246063232, - "learning_rate": 2.028619082138416e-05, - "loss": 0.5849, - "step": 201670 - }, - { - "epoch": 1.7829169539772627, - "grad_norm": 16.86878204345703, - "learning_rate": 2.028471743371229e-05, - "loss": 0.4943, - "step": 201680 - }, - { - "epoch": 1.7830053572375748, - "grad_norm": 2.302427053451538, - "learning_rate": 2.028324404604042e-05, - "loss": 0.6317, - "step": 201690 - }, - { - "epoch": 1.7830937604978871, - "grad_norm": 2.126096248626709, - "learning_rate": 2.028177065836855e-05, - "loss": 0.5878, - "step": 201700 - }, - { - "epoch": 1.7831821637581995, - "grad_norm": 1.160621166229248, - "learning_rate": 2.0280297270696678e-05, - "loss": 0.6054, - "step": 201710 - }, - { - "epoch": 1.7832705670185116, - "grad_norm": 1.9716119766235352, - "learning_rate": 2.0278823883024806e-05, - "loss": 0.4748, - "step": 201720 - }, - { - "epoch": 1.7833589702788237, - "grad_norm": 2.5228166580200195, - "learning_rate": 2.0277350495352938e-05, - "loss": 0.5856, - "step": 201730 - }, - { - "epoch": 1.783447373539136, - "grad_norm": 1.8789089918136597, - "learning_rate": 2.0275877107681066e-05, - "loss": 0.6641, - "step": 201740 - }, - { - "epoch": 1.7835357767994484, - "grad_norm": 2.8637356758117676, - "learning_rate": 2.0274403720009195e-05, - "loss": 0.5891, - "step": 201750 - }, - { - "epoch": 1.7836241800597605, - "grad_norm": 8.53894329071045, - "learning_rate": 2.0272930332337326e-05, - "loss": 0.5297, - "step": 201760 - }, - { - "epoch": 1.7837125833200729, - "grad_norm": 2.2200565338134766, - "learning_rate": 2.0271456944665455e-05, - "loss": 0.5071, - "step": 201770 - }, - { - "epoch": 1.7838009865803852, - "grad_norm": 2.633755683898926, - "learning_rate": 2.0269983556993583e-05, - "loss": 0.643, - "step": 201780 - }, - { - "epoch": 1.7838893898406973, - "grad_norm": 1.5142741203308105, - "learning_rate": 2.026851016932171e-05, - "loss": 0.4403, - "step": 201790 - }, - { - "epoch": 1.7839777931010095, - "grad_norm": 2.4997575283050537, - "learning_rate": 2.0267036781649843e-05, - "loss": 0.5829, - "step": 201800 - }, - { - "epoch": 1.7840661963613218, - "grad_norm": 1.807714581489563, - "learning_rate": 2.026556339397797e-05, - "loss": 0.7249, - "step": 201810 - }, - { - "epoch": 1.7841545996216341, - "grad_norm": 1.2535957098007202, - "learning_rate": 2.02640900063061e-05, - "loss": 0.5603, - "step": 201820 - }, - { - "epoch": 1.7842430028819463, - "grad_norm": 2.556269407272339, - "learning_rate": 2.0262616618634232e-05, - "loss": 0.5667, - "step": 201830 - }, - { - "epoch": 1.7843314061422584, - "grad_norm": 3.98608660697937, - "learning_rate": 2.026114323096236e-05, - "loss": 0.5177, - "step": 201840 - }, - { - "epoch": 1.7844198094025707, - "grad_norm": 1.199278712272644, - "learning_rate": 2.025966984329049e-05, - "loss": 0.4912, - "step": 201850 - }, - { - "epoch": 1.784508212662883, - "grad_norm": 2.08693265914917, - "learning_rate": 2.0258196455618617e-05, - "loss": 0.6314, - "step": 201860 - }, - { - "epoch": 1.7845966159231952, - "grad_norm": 5.786932468414307, - "learning_rate": 2.025672306794675e-05, - "loss": 0.6169, - "step": 201870 - }, - { - "epoch": 1.7846850191835075, - "grad_norm": 1.3747122287750244, - "learning_rate": 2.0255249680274877e-05, - "loss": 0.6953, - "step": 201880 - }, - { - "epoch": 1.7847734224438199, - "grad_norm": 1.3487671613693237, - "learning_rate": 2.0253776292603005e-05, - "loss": 0.6751, - "step": 201890 - }, - { - "epoch": 1.784861825704132, - "grad_norm": 2.8167760372161865, - "learning_rate": 2.0252302904931134e-05, - "loss": 0.6142, - "step": 201900 - }, - { - "epoch": 1.784950228964444, - "grad_norm": 4.856353282928467, - "learning_rate": 2.0250829517259265e-05, - "loss": 0.6448, - "step": 201910 - }, - { - "epoch": 1.7850386322247564, - "grad_norm": 2.498176097869873, - "learning_rate": 2.0249356129587394e-05, - "loss": 0.6247, - "step": 201920 - }, - { - "epoch": 1.7851270354850688, - "grad_norm": 2.4965102672576904, - "learning_rate": 2.0247882741915522e-05, - "loss": 0.6207, - "step": 201930 - }, - { - "epoch": 1.785215438745381, - "grad_norm": 3.6828131675720215, - "learning_rate": 2.0246409354243654e-05, - "loss": 0.6407, - "step": 201940 - }, - { - "epoch": 1.785303842005693, - "grad_norm": 2.793348789215088, - "learning_rate": 2.0244935966571782e-05, - "loss": 0.6003, - "step": 201950 - }, - { - "epoch": 1.7853922452660054, - "grad_norm": 1.7601300477981567, - "learning_rate": 2.024346257889991e-05, - "loss": 0.5063, - "step": 201960 - }, - { - "epoch": 1.7854806485263177, - "grad_norm": 2.9499454498291016, - "learning_rate": 2.024198919122804e-05, - "loss": 0.5465, - "step": 201970 - }, - { - "epoch": 1.7855690517866298, - "grad_norm": 6.607210636138916, - "learning_rate": 2.024051580355617e-05, - "loss": 0.6002, - "step": 201980 - }, - { - "epoch": 1.7856574550469422, - "grad_norm": 1.4262440204620361, - "learning_rate": 2.02390424158843e-05, - "loss": 0.5102, - "step": 201990 - }, - { - "epoch": 1.7857458583072545, - "grad_norm": 1.0396995544433594, - "learning_rate": 2.0237569028212427e-05, - "loss": 0.6954, - "step": 202000 - }, - { - "epoch": 1.7858342615675666, - "grad_norm": 2.5385825634002686, - "learning_rate": 2.0236095640540556e-05, - "loss": 0.6196, - "step": 202010 - }, - { - "epoch": 1.7859226648278788, - "grad_norm": 9.166196823120117, - "learning_rate": 2.0234622252868687e-05, - "loss": 0.5676, - "step": 202020 - }, - { - "epoch": 1.786011068088191, - "grad_norm": 3.221381902694702, - "learning_rate": 2.0233148865196816e-05, - "loss": 0.5448, - "step": 202030 - }, - { - "epoch": 1.7860994713485034, - "grad_norm": 2.0953943729400635, - "learning_rate": 2.0231675477524944e-05, - "loss": 0.5464, - "step": 202040 - }, - { - "epoch": 1.7861878746088156, - "grad_norm": 6.100296497344971, - "learning_rate": 2.0230202089853076e-05, - "loss": 0.5461, - "step": 202050 - }, - { - "epoch": 1.7862762778691277, - "grad_norm": 2.7867844104766846, - "learning_rate": 2.0228728702181204e-05, - "loss": 0.5864, - "step": 202060 - }, - { - "epoch": 1.78636468112944, - "grad_norm": 1.0863789319992065, - "learning_rate": 2.0227255314509333e-05, - "loss": 0.5319, - "step": 202070 - }, - { - "epoch": 1.7864530843897524, - "grad_norm": 1.249638557434082, - "learning_rate": 2.022578192683746e-05, - "loss": 0.6176, - "step": 202080 - }, - { - "epoch": 1.7865414876500645, - "grad_norm": 3.3248021602630615, - "learning_rate": 2.0224308539165593e-05, - "loss": 0.6684, - "step": 202090 - }, - { - "epoch": 1.7866298909103768, - "grad_norm": 2.549376964569092, - "learning_rate": 2.022283515149372e-05, - "loss": 0.4447, - "step": 202100 - }, - { - "epoch": 1.7867182941706892, - "grad_norm": 2.5520517826080322, - "learning_rate": 2.022136176382185e-05, - "loss": 0.5984, - "step": 202110 - }, - { - "epoch": 1.7868066974310013, - "grad_norm": 3.3038833141326904, - "learning_rate": 2.021988837614998e-05, - "loss": 0.6856, - "step": 202120 - }, - { - "epoch": 1.7868951006913134, - "grad_norm": 4.299644947052002, - "learning_rate": 2.021841498847811e-05, - "loss": 0.5675, - "step": 202130 - }, - { - "epoch": 1.7869835039516258, - "grad_norm": 4.799663543701172, - "learning_rate": 2.0216941600806238e-05, - "loss": 0.6688, - "step": 202140 - }, - { - "epoch": 1.787071907211938, - "grad_norm": 2.128201961517334, - "learning_rate": 2.0215468213134366e-05, - "loss": 0.6304, - "step": 202150 - }, - { - "epoch": 1.7871603104722502, - "grad_norm": 8.448138236999512, - "learning_rate": 2.0213994825462498e-05, - "loss": 0.5856, - "step": 202160 - }, - { - "epoch": 1.7872487137325623, - "grad_norm": 1.518473744392395, - "learning_rate": 2.0212521437790626e-05, - "loss": 0.4824, - "step": 202170 - }, - { - "epoch": 1.7873371169928747, - "grad_norm": 3.5119426250457764, - "learning_rate": 2.0211048050118755e-05, - "loss": 0.6747, - "step": 202180 - }, - { - "epoch": 1.787425520253187, - "grad_norm": 5.362548828125, - "learning_rate": 2.0209574662446883e-05, - "loss": 0.6061, - "step": 202190 - }, - { - "epoch": 1.7875139235134991, - "grad_norm": 2.3434488773345947, - "learning_rate": 2.0208101274775015e-05, - "loss": 0.5408, - "step": 202200 - }, - { - "epoch": 1.7876023267738113, - "grad_norm": 2.1973605155944824, - "learning_rate": 2.0206627887103143e-05, - "loss": 0.5496, - "step": 202210 - }, - { - "epoch": 1.7876907300341238, - "grad_norm": 5.755651950836182, - "learning_rate": 2.020515449943127e-05, - "loss": 0.5231, - "step": 202220 - }, - { - "epoch": 1.787779133294436, - "grad_norm": 16.390277862548828, - "learning_rate": 2.0203681111759403e-05, - "loss": 0.6467, - "step": 202230 - }, - { - "epoch": 1.787867536554748, - "grad_norm": 1.874927282333374, - "learning_rate": 2.020220772408753e-05, - "loss": 0.6742, - "step": 202240 - }, - { - "epoch": 1.7879559398150604, - "grad_norm": 5.835158824920654, - "learning_rate": 2.020073433641566e-05, - "loss": 0.5809, - "step": 202250 - }, - { - "epoch": 1.7880443430753727, - "grad_norm": 1.696328043937683, - "learning_rate": 2.0199260948743788e-05, - "loss": 0.5666, - "step": 202260 - }, - { - "epoch": 1.7881327463356849, - "grad_norm": 5.258299827575684, - "learning_rate": 2.019778756107192e-05, - "loss": 0.5327, - "step": 202270 - }, - { - "epoch": 1.788221149595997, - "grad_norm": 1.2496052980422974, - "learning_rate": 2.0196314173400048e-05, - "loss": 0.5961, - "step": 202280 - }, - { - "epoch": 1.7883095528563093, - "grad_norm": 1.8631751537322998, - "learning_rate": 2.0194840785728177e-05, - "loss": 0.5465, - "step": 202290 - }, - { - "epoch": 1.7883979561166217, - "grad_norm": 1.9512073993682861, - "learning_rate": 2.019336739805631e-05, - "loss": 0.6174, - "step": 202300 - }, - { - "epoch": 1.7884863593769338, - "grad_norm": 2.4947447776794434, - "learning_rate": 2.0191894010384437e-05, - "loss": 0.648, - "step": 202310 - }, - { - "epoch": 1.788574762637246, - "grad_norm": 1.7648309469223022, - "learning_rate": 2.0190420622712565e-05, - "loss": 0.5216, - "step": 202320 - }, - { - "epoch": 1.7886631658975582, - "grad_norm": 2.477572441101074, - "learning_rate": 2.0188947235040693e-05, - "loss": 0.6343, - "step": 202330 - }, - { - "epoch": 1.7887515691578706, - "grad_norm": 3.5414538383483887, - "learning_rate": 2.0187473847368825e-05, - "loss": 0.4303, - "step": 202340 - }, - { - "epoch": 1.7888399724181827, - "grad_norm": 2.2090277671813965, - "learning_rate": 2.0186000459696954e-05, - "loss": 0.5601, - "step": 202350 - }, - { - "epoch": 1.788928375678495, - "grad_norm": 1.491808295249939, - "learning_rate": 2.0184527072025082e-05, - "loss": 0.6144, - "step": 202360 - }, - { - "epoch": 1.7890167789388074, - "grad_norm": 1.3663378953933716, - "learning_rate": 2.0183053684353214e-05, - "loss": 0.435, - "step": 202370 - }, - { - "epoch": 1.7891051821991195, - "grad_norm": 12.870560646057129, - "learning_rate": 2.0181580296681342e-05, - "loss": 0.4637, - "step": 202380 - }, - { - "epoch": 1.7891935854594316, - "grad_norm": 3.9255971908569336, - "learning_rate": 2.018010690900947e-05, - "loss": 0.5703, - "step": 202390 - }, - { - "epoch": 1.789281988719744, - "grad_norm": 5.678790092468262, - "learning_rate": 2.0178633521337602e-05, - "loss": 0.6829, - "step": 202400 - }, - { - "epoch": 1.7893703919800563, - "grad_norm": 3.1971166133880615, - "learning_rate": 2.017716013366573e-05, - "loss": 0.5526, - "step": 202410 - }, - { - "epoch": 1.7894587952403684, - "grad_norm": 3.0424091815948486, - "learning_rate": 2.017568674599386e-05, - "loss": 0.5215, - "step": 202420 - }, - { - "epoch": 1.7895471985006806, - "grad_norm": 2.290457248687744, - "learning_rate": 2.017421335832199e-05, - "loss": 0.4215, - "step": 202430 - }, - { - "epoch": 1.789635601760993, - "grad_norm": 4.569860935211182, - "learning_rate": 2.017273997065012e-05, - "loss": 0.6219, - "step": 202440 - }, - { - "epoch": 1.7897240050213052, - "grad_norm": 3.1228394508361816, - "learning_rate": 2.0171266582978247e-05, - "loss": 0.478, - "step": 202450 - }, - { - "epoch": 1.7898124082816174, - "grad_norm": 2.4187965393066406, - "learning_rate": 2.016979319530638e-05, - "loss": 0.4837, - "step": 202460 - }, - { - "epoch": 1.7899008115419297, - "grad_norm": 2.5301291942596436, - "learning_rate": 2.0168319807634507e-05, - "loss": 0.6361, - "step": 202470 - }, - { - "epoch": 1.789989214802242, - "grad_norm": 3.850473642349243, - "learning_rate": 2.0166846419962636e-05, - "loss": 0.6645, - "step": 202480 - }, - { - "epoch": 1.7900776180625542, - "grad_norm": 6.328955173492432, - "learning_rate": 2.0165373032290767e-05, - "loss": 0.6638, - "step": 202490 - }, - { - "epoch": 1.7901660213228663, - "grad_norm": 2.0801162719726562, - "learning_rate": 2.0163899644618896e-05, - "loss": 0.7099, - "step": 202500 - }, - { - "epoch": 1.7902544245831786, - "grad_norm": 40.06092834472656, - "learning_rate": 2.0162426256947024e-05, - "loss": 0.5311, - "step": 202510 - }, - { - "epoch": 1.790342827843491, - "grad_norm": 8.495887756347656, - "learning_rate": 2.0160952869275156e-05, - "loss": 0.6118, - "step": 202520 - }, - { - "epoch": 1.790431231103803, - "grad_norm": 5.132534980773926, - "learning_rate": 2.0159479481603284e-05, - "loss": 0.6539, - "step": 202530 - }, - { - "epoch": 1.7905196343641152, - "grad_norm": 2.4099485874176025, - "learning_rate": 2.0158006093931413e-05, - "loss": 0.67, - "step": 202540 - }, - { - "epoch": 1.7906080376244276, - "grad_norm": 7.6244049072265625, - "learning_rate": 2.015653270625954e-05, - "loss": 0.6178, - "step": 202550 - }, - { - "epoch": 1.79069644088474, - "grad_norm": 3.099921703338623, - "learning_rate": 2.0155059318587673e-05, - "loss": 0.5688, - "step": 202560 - }, - { - "epoch": 1.790784844145052, - "grad_norm": 2.667494058609009, - "learning_rate": 2.01535859309158e-05, - "loss": 0.6662, - "step": 202570 - }, - { - "epoch": 1.7908732474053644, - "grad_norm": 7.829151153564453, - "learning_rate": 2.015211254324393e-05, - "loss": 0.5814, - "step": 202580 - }, - { - "epoch": 1.7909616506656767, - "grad_norm": 3.4630019664764404, - "learning_rate": 2.015063915557206e-05, - "loss": 0.5265, - "step": 202590 - }, - { - "epoch": 1.7910500539259888, - "grad_norm": 1.9397492408752441, - "learning_rate": 2.014916576790019e-05, - "loss": 0.652, - "step": 202600 - }, - { - "epoch": 1.791138457186301, - "grad_norm": 2.0695536136627197, - "learning_rate": 2.0147692380228318e-05, - "loss": 0.6052, - "step": 202610 - }, - { - "epoch": 1.7912268604466133, - "grad_norm": 1.3530495166778564, - "learning_rate": 2.0146218992556446e-05, - "loss": 0.544, - "step": 202620 - }, - { - "epoch": 1.7913152637069256, - "grad_norm": 3.372593402862549, - "learning_rate": 2.0144745604884578e-05, - "loss": 0.4413, - "step": 202630 - }, - { - "epoch": 1.7914036669672377, - "grad_norm": 10.179778099060059, - "learning_rate": 2.0143272217212706e-05, - "loss": 0.6925, - "step": 202640 - }, - { - "epoch": 1.7914920702275499, - "grad_norm": 1.1542190313339233, - "learning_rate": 2.0141798829540835e-05, - "loss": 0.5617, - "step": 202650 - }, - { - "epoch": 1.7915804734878622, - "grad_norm": 8.816036224365234, - "learning_rate": 2.0140325441868963e-05, - "loss": 0.6547, - "step": 202660 - }, - { - "epoch": 1.7916688767481745, - "grad_norm": 1.5335596799850464, - "learning_rate": 2.0138852054197095e-05, - "loss": 0.472, - "step": 202670 - }, - { - "epoch": 1.7917572800084867, - "grad_norm": 2.918468475341797, - "learning_rate": 2.0137378666525223e-05, - "loss": 0.6504, - "step": 202680 - }, - { - "epoch": 1.791845683268799, - "grad_norm": 4.687645435333252, - "learning_rate": 2.013590527885335e-05, - "loss": 0.6655, - "step": 202690 - }, - { - "epoch": 1.7919340865291113, - "grad_norm": 2.681749105453491, - "learning_rate": 2.0134431891181483e-05, - "loss": 0.5321, - "step": 202700 - }, - { - "epoch": 1.7920224897894235, - "grad_norm": 0.9618099927902222, - "learning_rate": 2.013295850350961e-05, - "loss": 0.6792, - "step": 202710 - }, - { - "epoch": 1.7921108930497356, - "grad_norm": 1.7380198240280151, - "learning_rate": 2.013148511583774e-05, - "loss": 0.6196, - "step": 202720 - }, - { - "epoch": 1.792199296310048, - "grad_norm": 2.081829786300659, - "learning_rate": 2.0130011728165868e-05, - "loss": 0.5425, - "step": 202730 - }, - { - "epoch": 1.7922876995703603, - "grad_norm": 1.2693742513656616, - "learning_rate": 2.0128538340494e-05, - "loss": 0.5532, - "step": 202740 - }, - { - "epoch": 1.7923761028306724, - "grad_norm": 3.4703986644744873, - "learning_rate": 2.012706495282213e-05, - "loss": 0.5527, - "step": 202750 - }, - { - "epoch": 1.7924645060909845, - "grad_norm": 8.27893352508545, - "learning_rate": 2.0125591565150257e-05, - "loss": 0.6402, - "step": 202760 - }, - { - "epoch": 1.7925529093512969, - "grad_norm": 1.1515133380889893, - "learning_rate": 2.012411817747839e-05, - "loss": 0.5985, - "step": 202770 - }, - { - "epoch": 1.7926413126116092, - "grad_norm": 1.263790488243103, - "learning_rate": 2.0122644789806517e-05, - "loss": 0.5517, - "step": 202780 - }, - { - "epoch": 1.7927297158719213, - "grad_norm": 5.390106678009033, - "learning_rate": 2.0121171402134645e-05, - "loss": 0.6238, - "step": 202790 - }, - { - "epoch": 1.7928181191322334, - "grad_norm": 4.11129093170166, - "learning_rate": 2.0119698014462774e-05, - "loss": 0.6483, - "step": 202800 - }, - { - "epoch": 1.792906522392546, - "grad_norm": 2.6334662437438965, - "learning_rate": 2.0118224626790905e-05, - "loss": 0.6785, - "step": 202810 - }, - { - "epoch": 1.7929949256528581, - "grad_norm": 2.90444278717041, - "learning_rate": 2.0116751239119034e-05, - "loss": 0.6102, - "step": 202820 - }, - { - "epoch": 1.7930833289131702, - "grad_norm": 1.446006178855896, - "learning_rate": 2.0115277851447162e-05, - "loss": 0.5848, - "step": 202830 - }, - { - "epoch": 1.7931717321734826, - "grad_norm": 2.223749876022339, - "learning_rate": 2.011380446377529e-05, - "loss": 0.5806, - "step": 202840 - }, - { - "epoch": 1.793260135433795, - "grad_norm": 1.503097414970398, - "learning_rate": 2.0112331076103422e-05, - "loss": 0.5039, - "step": 202850 - }, - { - "epoch": 1.793348538694107, - "grad_norm": 2.7936577796936035, - "learning_rate": 2.011085768843155e-05, - "loss": 0.6021, - "step": 202860 - }, - { - "epoch": 1.7934369419544192, - "grad_norm": 1.772740125656128, - "learning_rate": 2.010938430075968e-05, - "loss": 0.7105, - "step": 202870 - }, - { - "epoch": 1.7935253452147315, - "grad_norm": 1.6509367227554321, - "learning_rate": 2.010791091308781e-05, - "loss": 0.5605, - "step": 202880 - }, - { - "epoch": 1.7936137484750438, - "grad_norm": 1.566184639930725, - "learning_rate": 2.010643752541594e-05, - "loss": 0.55, - "step": 202890 - }, - { - "epoch": 1.793702151735356, - "grad_norm": 3.6672322750091553, - "learning_rate": 2.0104964137744067e-05, - "loss": 0.688, - "step": 202900 - }, - { - "epoch": 1.793790554995668, - "grad_norm": 3.762032985687256, - "learning_rate": 2.0103490750072196e-05, - "loss": 0.5696, - "step": 202910 - }, - { - "epoch": 1.7938789582559804, - "grad_norm": 8.605866432189941, - "learning_rate": 2.0102017362400327e-05, - "loss": 0.6872, - "step": 202920 - }, - { - "epoch": 1.7939673615162928, - "grad_norm": 2.1437554359436035, - "learning_rate": 2.0100543974728456e-05, - "loss": 0.5188, - "step": 202930 - }, - { - "epoch": 1.7940557647766049, - "grad_norm": 8.493627548217773, - "learning_rate": 2.0099070587056584e-05, - "loss": 0.6031, - "step": 202940 - }, - { - "epoch": 1.7941441680369172, - "grad_norm": 12.949837684631348, - "learning_rate": 2.0097597199384716e-05, - "loss": 0.555, - "step": 202950 - }, - { - "epoch": 1.7942325712972296, - "grad_norm": 5.7469353675842285, - "learning_rate": 2.0096123811712844e-05, - "loss": 0.5302, - "step": 202960 - }, - { - "epoch": 1.7943209745575417, - "grad_norm": 2.5767934322357178, - "learning_rate": 2.0094650424040972e-05, - "loss": 0.5887, - "step": 202970 - }, - { - "epoch": 1.7944093778178538, - "grad_norm": 1.2326583862304688, - "learning_rate": 2.00931770363691e-05, - "loss": 0.553, - "step": 202980 - }, - { - "epoch": 1.7944977810781662, - "grad_norm": 5.684199810028076, - "learning_rate": 2.0091703648697233e-05, - "loss": 0.5481, - "step": 202990 - }, - { - "epoch": 1.7945861843384785, - "grad_norm": 3.1634457111358643, - "learning_rate": 2.009023026102536e-05, - "loss": 0.6541, - "step": 203000 - }, - { - "epoch": 1.7946745875987906, - "grad_norm": 1.2563214302062988, - "learning_rate": 2.008875687335349e-05, - "loss": 0.6606, - "step": 203010 - }, - { - "epoch": 1.7947629908591027, - "grad_norm": 13.21400260925293, - "learning_rate": 2.0087283485681618e-05, - "loss": 0.6453, - "step": 203020 - }, - { - "epoch": 1.794851394119415, - "grad_norm": 23.091981887817383, - "learning_rate": 2.008581009800975e-05, - "loss": 0.5445, - "step": 203030 - }, - { - "epoch": 1.7949397973797274, - "grad_norm": 2.3949601650238037, - "learning_rate": 2.0084336710337878e-05, - "loss": 0.7609, - "step": 203040 - }, - { - "epoch": 1.7950282006400395, - "grad_norm": 1.2793152332305908, - "learning_rate": 2.0082863322666006e-05, - "loss": 0.4889, - "step": 203050 - }, - { - "epoch": 1.7951166039003519, - "grad_norm": 6.398225784301758, - "learning_rate": 2.0081389934994138e-05, - "loss": 0.6512, - "step": 203060 - }, - { - "epoch": 1.7952050071606642, - "grad_norm": 3.3708279132843018, - "learning_rate": 2.0079916547322266e-05, - "loss": 0.4781, - "step": 203070 - }, - { - "epoch": 1.7952934104209763, - "grad_norm": 0.9596735239028931, - "learning_rate": 2.0078443159650395e-05, - "loss": 0.5738, - "step": 203080 - }, - { - "epoch": 1.7953818136812885, - "grad_norm": 3.4195425510406494, - "learning_rate": 2.0076969771978523e-05, - "loss": 0.6649, - "step": 203090 - }, - { - "epoch": 1.7954702169416008, - "grad_norm": 2.021829128265381, - "learning_rate": 2.0075496384306655e-05, - "loss": 0.5668, - "step": 203100 - }, - { - "epoch": 1.7955586202019131, - "grad_norm": 2.659977912902832, - "learning_rate": 2.0074022996634783e-05, - "loss": 0.6641, - "step": 203110 - }, - { - "epoch": 1.7956470234622253, - "grad_norm": 7.592244625091553, - "learning_rate": 2.007254960896291e-05, - "loss": 0.6285, - "step": 203120 - }, - { - "epoch": 1.7957354267225374, - "grad_norm": 1.3313415050506592, - "learning_rate": 2.007107622129104e-05, - "loss": 0.469, - "step": 203130 - }, - { - "epoch": 1.7958238299828497, - "grad_norm": 11.043901443481445, - "learning_rate": 2.006960283361917e-05, - "loss": 0.5971, - "step": 203140 - }, - { - "epoch": 1.795912233243162, - "grad_norm": 1.4664943218231201, - "learning_rate": 2.00681294459473e-05, - "loss": 0.544, - "step": 203150 - }, - { - "epoch": 1.7960006365034742, - "grad_norm": 1.6054465770721436, - "learning_rate": 2.0066656058275428e-05, - "loss": 0.666, - "step": 203160 - }, - { - "epoch": 1.7960890397637865, - "grad_norm": 6.42251443862915, - "learning_rate": 2.006518267060356e-05, - "loss": 0.5357, - "step": 203170 - }, - { - "epoch": 1.7961774430240989, - "grad_norm": 5.198081970214844, - "learning_rate": 2.0063709282931688e-05, - "loss": 0.6537, - "step": 203180 - }, - { - "epoch": 1.796265846284411, - "grad_norm": 1.8932801485061646, - "learning_rate": 2.0062235895259817e-05, - "loss": 0.6992, - "step": 203190 - }, - { - "epoch": 1.7963542495447231, - "grad_norm": 2.0405161380767822, - "learning_rate": 2.0060762507587945e-05, - "loss": 0.6569, - "step": 203200 - }, - { - "epoch": 1.7964426528050355, - "grad_norm": 1.3182144165039062, - "learning_rate": 2.0059289119916077e-05, - "loss": 0.4165, - "step": 203210 - }, - { - "epoch": 1.7965310560653478, - "grad_norm": 3.6779301166534424, - "learning_rate": 2.0057815732244205e-05, - "loss": 0.6144, - "step": 203220 - }, - { - "epoch": 1.79661945932566, - "grad_norm": 2.590069055557251, - "learning_rate": 2.0056342344572333e-05, - "loss": 0.5455, - "step": 203230 - }, - { - "epoch": 1.796707862585972, - "grad_norm": 5.057015895843506, - "learning_rate": 2.0054868956900465e-05, - "loss": 0.5215, - "step": 203240 - }, - { - "epoch": 1.7967962658462844, - "grad_norm": 4.856685161590576, - "learning_rate": 2.0053395569228593e-05, - "loss": 0.8774, - "step": 203250 - }, - { - "epoch": 1.7968846691065967, - "grad_norm": 3.5035128593444824, - "learning_rate": 2.0051922181556722e-05, - "loss": 0.4991, - "step": 203260 - }, - { - "epoch": 1.7969730723669088, - "grad_norm": 2.6281065940856934, - "learning_rate": 2.005044879388485e-05, - "loss": 0.5703, - "step": 203270 - }, - { - "epoch": 1.7970614756272212, - "grad_norm": 3.6252622604370117, - "learning_rate": 2.0048975406212982e-05, - "loss": 0.6669, - "step": 203280 - }, - { - "epoch": 1.7971498788875335, - "grad_norm": 4.038813591003418, - "learning_rate": 2.004750201854111e-05, - "loss": 0.6555, - "step": 203290 - }, - { - "epoch": 1.7972382821478456, - "grad_norm": 1.6726902723312378, - "learning_rate": 2.004602863086924e-05, - "loss": 0.5421, - "step": 203300 - }, - { - "epoch": 1.7973266854081578, - "grad_norm": 8.145662307739258, - "learning_rate": 2.004455524319737e-05, - "loss": 0.6577, - "step": 203310 - }, - { - "epoch": 1.79741508866847, - "grad_norm": 1.3114120960235596, - "learning_rate": 2.00430818555255e-05, - "loss": 0.5077, - "step": 203320 - }, - { - "epoch": 1.7975034919287824, - "grad_norm": 3.426360845565796, - "learning_rate": 2.0041608467853627e-05, - "loss": 0.6362, - "step": 203330 - }, - { - "epoch": 1.7975918951890946, - "grad_norm": 1.7572712898254395, - "learning_rate": 2.004013508018176e-05, - "loss": 0.568, - "step": 203340 - }, - { - "epoch": 1.7976802984494067, - "grad_norm": 2.663112163543701, - "learning_rate": 2.0038661692509887e-05, - "loss": 0.5625, - "step": 203350 - }, - { - "epoch": 1.797768701709719, - "grad_norm": 3.7881641387939453, - "learning_rate": 2.0037188304838016e-05, - "loss": 0.6323, - "step": 203360 - }, - { - "epoch": 1.7978571049700314, - "grad_norm": 2.837618350982666, - "learning_rate": 2.0035714917166147e-05, - "loss": 0.6865, - "step": 203370 - }, - { - "epoch": 1.7979455082303435, - "grad_norm": 2.7353949546813965, - "learning_rate": 2.0034241529494276e-05, - "loss": 0.6266, - "step": 203380 - }, - { - "epoch": 1.7980339114906556, - "grad_norm": 3.1415469646453857, - "learning_rate": 2.0032768141822404e-05, - "loss": 0.6028, - "step": 203390 - }, - { - "epoch": 1.7981223147509682, - "grad_norm": 3.9146194458007812, - "learning_rate": 2.0031294754150536e-05, - "loss": 0.493, - "step": 203400 - }, - { - "epoch": 1.7982107180112803, - "grad_norm": 0.9928793907165527, - "learning_rate": 2.0029821366478664e-05, - "loss": 0.5807, - "step": 203410 - }, - { - "epoch": 1.7982991212715924, - "grad_norm": 2.017794132232666, - "learning_rate": 2.0028347978806792e-05, - "loss": 0.5576, - "step": 203420 - }, - { - "epoch": 1.7983875245319048, - "grad_norm": 9.675992012023926, - "learning_rate": 2.0026874591134924e-05, - "loss": 0.5098, - "step": 203430 - }, - { - "epoch": 1.798475927792217, - "grad_norm": 0.9910358786582947, - "learning_rate": 2.0025401203463053e-05, - "loss": 0.6325, - "step": 203440 - }, - { - "epoch": 1.7985643310525292, - "grad_norm": 3.145650625228882, - "learning_rate": 2.002392781579118e-05, - "loss": 0.5242, - "step": 203450 - }, - { - "epoch": 1.7986527343128413, - "grad_norm": 13.919158935546875, - "learning_rate": 2.0022454428119313e-05, - "loss": 0.6918, - "step": 203460 - }, - { - "epoch": 1.7987411375731537, - "grad_norm": 6.881532192230225, - "learning_rate": 2.002098104044744e-05, - "loss": 0.6199, - "step": 203470 - }, - { - "epoch": 1.798829540833466, - "grad_norm": 1.532932996749878, - "learning_rate": 2.001950765277557e-05, - "loss": 0.5106, - "step": 203480 - }, - { - "epoch": 1.7989179440937781, - "grad_norm": 3.1638576984405518, - "learning_rate": 2.0018034265103698e-05, - "loss": 0.5734, - "step": 203490 - }, - { - "epoch": 1.7990063473540903, - "grad_norm": 1.9062272310256958, - "learning_rate": 2.001656087743183e-05, - "loss": 0.7303, - "step": 203500 - }, - { - "epoch": 1.7990947506144026, - "grad_norm": 2.4404211044311523, - "learning_rate": 2.0015087489759958e-05, - "loss": 0.7045, - "step": 203510 - }, - { - "epoch": 1.799183153874715, - "grad_norm": 1.1322623491287231, - "learning_rate": 2.0013614102088086e-05, - "loss": 0.7148, - "step": 203520 - }, - { - "epoch": 1.799271557135027, - "grad_norm": 3.5437192916870117, - "learning_rate": 2.0012140714416218e-05, - "loss": 0.4968, - "step": 203530 - }, - { - "epoch": 1.7993599603953394, - "grad_norm": 2.8896358013153076, - "learning_rate": 2.0010667326744346e-05, - "loss": 0.5137, - "step": 203540 - }, - { - "epoch": 1.7994483636556518, - "grad_norm": 3.1829965114593506, - "learning_rate": 2.0009193939072475e-05, - "loss": 0.6372, - "step": 203550 - }, - { - "epoch": 1.7995367669159639, - "grad_norm": 4.139323711395264, - "learning_rate": 2.0007720551400603e-05, - "loss": 0.6396, - "step": 203560 - }, - { - "epoch": 1.799625170176276, - "grad_norm": 1.7024682760238647, - "learning_rate": 2.0006247163728735e-05, - "loss": 0.5441, - "step": 203570 - }, - { - "epoch": 1.7997135734365883, - "grad_norm": 4.200142860412598, - "learning_rate": 2.0004773776056863e-05, - "loss": 0.6474, - "step": 203580 - }, - { - "epoch": 1.7998019766969007, - "grad_norm": 7.579667091369629, - "learning_rate": 2.000330038838499e-05, - "loss": 0.6004, - "step": 203590 - }, - { - "epoch": 1.7998903799572128, - "grad_norm": 1.4747802019119263, - "learning_rate": 2.000182700071312e-05, - "loss": 0.6946, - "step": 203600 - }, - { - "epoch": 1.799978783217525, - "grad_norm": 1.8386363983154297, - "learning_rate": 2.000035361304125e-05, - "loss": 0.63, - "step": 203610 - }, - { - "epoch": 1.8000671864778373, - "grad_norm": 1.8821145296096802, - "learning_rate": 1.999888022536938e-05, - "loss": 0.4379, - "step": 203620 - }, - { - "epoch": 1.8001555897381496, - "grad_norm": 0.8716601133346558, - "learning_rate": 1.9997406837697508e-05, - "loss": 0.5653, - "step": 203630 - }, - { - "epoch": 1.8002439929984617, - "grad_norm": 6.133352756500244, - "learning_rate": 1.999593345002564e-05, - "loss": 0.6158, - "step": 203640 - }, - { - "epoch": 1.800332396258774, - "grad_norm": 2.3612663745880127, - "learning_rate": 1.999446006235377e-05, - "loss": 0.6145, - "step": 203650 - }, - { - "epoch": 1.8004207995190864, - "grad_norm": 7.695614814758301, - "learning_rate": 1.9992986674681897e-05, - "loss": 0.7172, - "step": 203660 - }, - { - "epoch": 1.8005092027793985, - "grad_norm": 2.7973742485046387, - "learning_rate": 1.9991513287010025e-05, - "loss": 0.5287, - "step": 203670 - }, - { - "epoch": 1.8005976060397106, - "grad_norm": 2.371340036392212, - "learning_rate": 1.9990039899338157e-05, - "loss": 0.5255, - "step": 203680 - }, - { - "epoch": 1.800686009300023, - "grad_norm": 1.6354972124099731, - "learning_rate": 1.9988566511666285e-05, - "loss": 0.5584, - "step": 203690 - }, - { - "epoch": 1.8007744125603353, - "grad_norm": 1.2594465017318726, - "learning_rate": 1.9987093123994413e-05, - "loss": 0.6533, - "step": 203700 - }, - { - "epoch": 1.8008628158206474, - "grad_norm": 6.658586025238037, - "learning_rate": 1.9985619736322545e-05, - "loss": 0.5343, - "step": 203710 - }, - { - "epoch": 1.8009512190809596, - "grad_norm": 21.469985961914062, - "learning_rate": 1.9984146348650674e-05, - "loss": 0.6156, - "step": 203720 - }, - { - "epoch": 1.801039622341272, - "grad_norm": 2.947948455810547, - "learning_rate": 1.9982672960978802e-05, - "loss": 0.594, - "step": 203730 - }, - { - "epoch": 1.8011280256015842, - "grad_norm": 10.709976196289062, - "learning_rate": 1.998119957330693e-05, - "loss": 0.5257, - "step": 203740 - }, - { - "epoch": 1.8012164288618964, - "grad_norm": 4.042881488800049, - "learning_rate": 1.9979726185635062e-05, - "loss": 0.7276, - "step": 203750 - }, - { - "epoch": 1.8013048321222087, - "grad_norm": 10.027393341064453, - "learning_rate": 1.997825279796319e-05, - "loss": 0.4674, - "step": 203760 - }, - { - "epoch": 1.801393235382521, - "grad_norm": 3.7841732501983643, - "learning_rate": 1.997677941029132e-05, - "loss": 0.5067, - "step": 203770 - }, - { - "epoch": 1.8014816386428332, - "grad_norm": 6.647843837738037, - "learning_rate": 1.9975306022619447e-05, - "loss": 0.6248, - "step": 203780 - }, - { - "epoch": 1.8015700419031453, - "grad_norm": 1.9358712434768677, - "learning_rate": 1.997383263494758e-05, - "loss": 0.5107, - "step": 203790 - }, - { - "epoch": 1.8016584451634576, - "grad_norm": 5.811150074005127, - "learning_rate": 1.9972359247275707e-05, - "loss": 0.5592, - "step": 203800 - }, - { - "epoch": 1.80174684842377, - "grad_norm": 2.651205539703369, - "learning_rate": 1.9970885859603836e-05, - "loss": 0.5803, - "step": 203810 - }, - { - "epoch": 1.801835251684082, - "grad_norm": 8.953720092773438, - "learning_rate": 1.9969412471931967e-05, - "loss": 0.8144, - "step": 203820 - }, - { - "epoch": 1.8019236549443942, - "grad_norm": 4.853878498077393, - "learning_rate": 1.9967939084260096e-05, - "loss": 0.5215, - "step": 203830 - }, - { - "epoch": 1.8020120582047066, - "grad_norm": 2.547917604446411, - "learning_rate": 1.9966465696588224e-05, - "loss": 0.6414, - "step": 203840 - }, - { - "epoch": 1.802100461465019, - "grad_norm": 1.9385179281234741, - "learning_rate": 1.9964992308916352e-05, - "loss": 0.6466, - "step": 203850 - }, - { - "epoch": 1.802188864725331, - "grad_norm": 1.986112117767334, - "learning_rate": 1.9963518921244484e-05, - "loss": 0.6, - "step": 203860 - }, - { - "epoch": 1.8022772679856434, - "grad_norm": 10.715702056884766, - "learning_rate": 1.9962045533572612e-05, - "loss": 0.5021, - "step": 203870 - }, - { - "epoch": 1.8023656712459557, - "grad_norm": 1.070197343826294, - "learning_rate": 1.996057214590074e-05, - "loss": 0.5645, - "step": 203880 - }, - { - "epoch": 1.8024540745062678, - "grad_norm": 2.2143959999084473, - "learning_rate": 1.9959098758228873e-05, - "loss": 0.6572, - "step": 203890 - }, - { - "epoch": 1.80254247776658, - "grad_norm": 4.326207160949707, - "learning_rate": 1.9957625370557e-05, - "loss": 0.5749, - "step": 203900 - }, - { - "epoch": 1.8026308810268923, - "grad_norm": 4.877932548522949, - "learning_rate": 1.995615198288513e-05, - "loss": 0.582, - "step": 203910 - }, - { - "epoch": 1.8027192842872046, - "grad_norm": 2.591742753982544, - "learning_rate": 1.9954678595213258e-05, - "loss": 0.5947, - "step": 203920 - }, - { - "epoch": 1.8028076875475167, - "grad_norm": 3.0118486881256104, - "learning_rate": 1.995320520754139e-05, - "loss": 0.5979, - "step": 203930 - }, - { - "epoch": 1.8028960908078289, - "grad_norm": 11.166177749633789, - "learning_rate": 1.9951731819869518e-05, - "loss": 0.6252, - "step": 203940 - }, - { - "epoch": 1.8029844940681412, - "grad_norm": 8.568842887878418, - "learning_rate": 1.9950258432197646e-05, - "loss": 0.6038, - "step": 203950 - }, - { - "epoch": 1.8030728973284535, - "grad_norm": 3.4849417209625244, - "learning_rate": 1.9948785044525774e-05, - "loss": 0.5048, - "step": 203960 - }, - { - "epoch": 1.8031613005887657, - "grad_norm": 3.739942789077759, - "learning_rate": 1.9947311656853906e-05, - "loss": 0.5712, - "step": 203970 - }, - { - "epoch": 1.8032497038490778, - "grad_norm": 2.258883476257324, - "learning_rate": 1.9945838269182034e-05, - "loss": 0.6069, - "step": 203980 - }, - { - "epoch": 1.8033381071093904, - "grad_norm": 1.2948611974716187, - "learning_rate": 1.9944364881510163e-05, - "loss": 0.5668, - "step": 203990 - }, - { - "epoch": 1.8034265103697025, - "grad_norm": 3.449302911758423, - "learning_rate": 1.9942891493838295e-05, - "loss": 0.6062, - "step": 204000 - }, - { - "epoch": 1.8035149136300146, - "grad_norm": 9.215113639831543, - "learning_rate": 1.9941418106166423e-05, - "loss": 0.562, - "step": 204010 - }, - { - "epoch": 1.803603316890327, - "grad_norm": 1.5984588861465454, - "learning_rate": 1.993994471849455e-05, - "loss": 0.64, - "step": 204020 - }, - { - "epoch": 1.8036917201506393, - "grad_norm": 1.348873496055603, - "learning_rate": 1.993847133082268e-05, - "loss": 0.5534, - "step": 204030 - }, - { - "epoch": 1.8037801234109514, - "grad_norm": 10.261507034301758, - "learning_rate": 1.993699794315081e-05, - "loss": 0.4839, - "step": 204040 - }, - { - "epoch": 1.8038685266712635, - "grad_norm": 5.606393814086914, - "learning_rate": 1.993552455547894e-05, - "loss": 0.5313, - "step": 204050 - }, - { - "epoch": 1.8039569299315759, - "grad_norm": 3.2035329341888428, - "learning_rate": 1.9934051167807068e-05, - "loss": 0.6767, - "step": 204060 - }, - { - "epoch": 1.8040453331918882, - "grad_norm": 2.819847822189331, - "learning_rate": 1.99325777801352e-05, - "loss": 0.6076, - "step": 204070 - }, - { - "epoch": 1.8041337364522003, - "grad_norm": 4.736661434173584, - "learning_rate": 1.9931104392463328e-05, - "loss": 0.7026, - "step": 204080 - }, - { - "epoch": 1.8042221397125124, - "grad_norm": 2.1682775020599365, - "learning_rate": 1.9929631004791457e-05, - "loss": 0.5879, - "step": 204090 - }, - { - "epoch": 1.8043105429728248, - "grad_norm": 1.8323850631713867, - "learning_rate": 1.9928157617119585e-05, - "loss": 0.5783, - "step": 204100 - }, - { - "epoch": 1.8043989462331371, - "grad_norm": 4.539486408233643, - "learning_rate": 1.9926684229447717e-05, - "loss": 0.7798, - "step": 204110 - }, - { - "epoch": 1.8044873494934492, - "grad_norm": 2.930159330368042, - "learning_rate": 1.9925210841775845e-05, - "loss": 0.513, - "step": 204120 - }, - { - "epoch": 1.8045757527537616, - "grad_norm": 2.8496460914611816, - "learning_rate": 1.9923737454103973e-05, - "loss": 0.6242, - "step": 204130 - }, - { - "epoch": 1.804664156014074, - "grad_norm": 6.112209796905518, - "learning_rate": 1.9922264066432102e-05, - "loss": 0.6255, - "step": 204140 - }, - { - "epoch": 1.804752559274386, - "grad_norm": 1.8608163595199585, - "learning_rate": 1.9920790678760233e-05, - "loss": 0.6518, - "step": 204150 - }, - { - "epoch": 1.8048409625346982, - "grad_norm": 1.0296214818954468, - "learning_rate": 1.9919317291088362e-05, - "loss": 0.6478, - "step": 204160 - }, - { - "epoch": 1.8049293657950105, - "grad_norm": 3.8804173469543457, - "learning_rate": 1.991784390341649e-05, - "loss": 0.5584, - "step": 204170 - }, - { - "epoch": 1.8050177690553229, - "grad_norm": 2.2194786071777344, - "learning_rate": 1.9916370515744622e-05, - "loss": 0.5766, - "step": 204180 - }, - { - "epoch": 1.805106172315635, - "grad_norm": 2.818420171737671, - "learning_rate": 1.991489712807275e-05, - "loss": 0.6371, - "step": 204190 - }, - { - "epoch": 1.805194575575947, - "grad_norm": 7.022854804992676, - "learning_rate": 1.991342374040088e-05, - "loss": 0.5697, - "step": 204200 - }, - { - "epoch": 1.8052829788362594, - "grad_norm": 1.2418639659881592, - "learning_rate": 1.9911950352729007e-05, - "loss": 0.6116, - "step": 204210 - }, - { - "epoch": 1.8053713820965718, - "grad_norm": 2.139376163482666, - "learning_rate": 1.991047696505714e-05, - "loss": 0.5006, - "step": 204220 - }, - { - "epoch": 1.805459785356884, - "grad_norm": 2.4272873401641846, - "learning_rate": 1.9909003577385267e-05, - "loss": 0.4331, - "step": 204230 - }, - { - "epoch": 1.8055481886171962, - "grad_norm": 1.1874834299087524, - "learning_rate": 1.9907530189713395e-05, - "loss": 0.4698, - "step": 204240 - }, - { - "epoch": 1.8056365918775086, - "grad_norm": 3.5929088592529297, - "learning_rate": 1.9906056802041527e-05, - "loss": 0.6432, - "step": 204250 - }, - { - "epoch": 1.8057249951378207, - "grad_norm": 1.9485775232315063, - "learning_rate": 1.9904583414369656e-05, - "loss": 0.581, - "step": 204260 - }, - { - "epoch": 1.8058133983981328, - "grad_norm": 2.9153568744659424, - "learning_rate": 1.9903110026697784e-05, - "loss": 0.6045, - "step": 204270 - }, - { - "epoch": 1.8059018016584452, - "grad_norm": 4.08786153793335, - "learning_rate": 1.9901636639025916e-05, - "loss": 0.5731, - "step": 204280 - }, - { - "epoch": 1.8059902049187575, - "grad_norm": 2.3672847747802734, - "learning_rate": 1.9900163251354044e-05, - "loss": 0.661, - "step": 204290 - }, - { - "epoch": 1.8060786081790696, - "grad_norm": 3.9267995357513428, - "learning_rate": 1.9898689863682172e-05, - "loss": 0.6448, - "step": 204300 - }, - { - "epoch": 1.8061670114393817, - "grad_norm": 4.543934345245361, - "learning_rate": 1.9897216476010304e-05, - "loss": 0.6241, - "step": 204310 - }, - { - "epoch": 1.806255414699694, - "grad_norm": 1.5214087963104248, - "learning_rate": 1.9895743088338432e-05, - "loss": 0.6662, - "step": 204320 - }, - { - "epoch": 1.8063438179600064, - "grad_norm": 2.0061838626861572, - "learning_rate": 1.989426970066656e-05, - "loss": 0.5999, - "step": 204330 - }, - { - "epoch": 1.8064322212203185, - "grad_norm": 1.2490204572677612, - "learning_rate": 1.9892796312994692e-05, - "loss": 0.6354, - "step": 204340 - }, - { - "epoch": 1.8065206244806309, - "grad_norm": 4.839119911193848, - "learning_rate": 1.989132292532282e-05, - "loss": 0.6157, - "step": 204350 - }, - { - "epoch": 1.8066090277409432, - "grad_norm": 2.272608995437622, - "learning_rate": 1.988984953765095e-05, - "loss": 0.5604, - "step": 204360 - }, - { - "epoch": 1.8066974310012553, - "grad_norm": 5.739010334014893, - "learning_rate": 1.988837614997908e-05, - "loss": 0.5527, - "step": 204370 - }, - { - "epoch": 1.8067858342615675, - "grad_norm": 1.696432113647461, - "learning_rate": 1.988690276230721e-05, - "loss": 0.6518, - "step": 204380 - }, - { - "epoch": 1.8068742375218798, - "grad_norm": 5.386918067932129, - "learning_rate": 1.9885429374635338e-05, - "loss": 0.6386, - "step": 204390 - }, - { - "epoch": 1.8069626407821922, - "grad_norm": 1.4314566850662231, - "learning_rate": 1.988395598696347e-05, - "loss": 0.6696, - "step": 204400 - }, - { - "epoch": 1.8070510440425043, - "grad_norm": 1.2361992597579956, - "learning_rate": 1.9882482599291598e-05, - "loss": 0.612, - "step": 204410 - }, - { - "epoch": 1.8071394473028164, - "grad_norm": 3.845834732055664, - "learning_rate": 1.9881009211619726e-05, - "loss": 0.5499, - "step": 204420 - }, - { - "epoch": 1.8072278505631287, - "grad_norm": 2.6744496822357178, - "learning_rate": 1.9879535823947854e-05, - "loss": 0.6015, - "step": 204430 - }, - { - "epoch": 1.807316253823441, - "grad_norm": 1.3522708415985107, - "learning_rate": 1.9878062436275986e-05, - "loss": 0.5649, - "step": 204440 - }, - { - "epoch": 1.8074046570837532, - "grad_norm": 0.5291694402694702, - "learning_rate": 1.9876589048604115e-05, - "loss": 0.6499, - "step": 204450 - }, - { - "epoch": 1.8074930603440655, - "grad_norm": 2.6592183113098145, - "learning_rate": 1.9875115660932243e-05, - "loss": 0.5, - "step": 204460 - }, - { - "epoch": 1.8075814636043779, - "grad_norm": 1.8216780424118042, - "learning_rate": 1.9873642273260375e-05, - "loss": 0.7019, - "step": 204470 - }, - { - "epoch": 1.80766986686469, - "grad_norm": 8.142723083496094, - "learning_rate": 1.9872168885588503e-05, - "loss": 0.5172, - "step": 204480 - }, - { - "epoch": 1.8077582701250021, - "grad_norm": 6.1031341552734375, - "learning_rate": 1.987069549791663e-05, - "loss": 0.4787, - "step": 204490 - }, - { - "epoch": 1.8078466733853145, - "grad_norm": 1.4311480522155762, - "learning_rate": 1.986922211024476e-05, - "loss": 0.6433, - "step": 204500 - }, - { - "epoch": 1.8079350766456268, - "grad_norm": 2.7168381214141846, - "learning_rate": 1.986774872257289e-05, - "loss": 0.5925, - "step": 204510 - }, - { - "epoch": 1.808023479905939, - "grad_norm": 2.2922656536102295, - "learning_rate": 1.986627533490102e-05, - "loss": 0.6095, - "step": 204520 - }, - { - "epoch": 1.808111883166251, - "grad_norm": 1.2676931619644165, - "learning_rate": 1.9864801947229148e-05, - "loss": 0.6203, - "step": 204530 - }, - { - "epoch": 1.8082002864265634, - "grad_norm": 1.2178192138671875, - "learning_rate": 1.986332855955728e-05, - "loss": 0.4626, - "step": 204540 - }, - { - "epoch": 1.8082886896868757, - "grad_norm": 6.804373741149902, - "learning_rate": 1.9861855171885408e-05, - "loss": 0.5906, - "step": 204550 - }, - { - "epoch": 1.8083770929471878, - "grad_norm": 1.9722411632537842, - "learning_rate": 1.9860381784213537e-05, - "loss": 0.5679, - "step": 204560 - }, - { - "epoch": 1.8084654962075, - "grad_norm": 1.387751579284668, - "learning_rate": 1.9858908396541665e-05, - "loss": 0.5495, - "step": 204570 - }, - { - "epoch": 1.8085538994678125, - "grad_norm": 2.8149945735931396, - "learning_rate": 1.9857435008869797e-05, - "loss": 0.598, - "step": 204580 - }, - { - "epoch": 1.8086423027281247, - "grad_norm": 0.8277774453163147, - "learning_rate": 1.9855961621197925e-05, - "loss": 0.627, - "step": 204590 - }, - { - "epoch": 1.8087307059884368, - "grad_norm": 4.5288543701171875, - "learning_rate": 1.9854488233526053e-05, - "loss": 0.6177, - "step": 204600 - }, - { - "epoch": 1.8088191092487491, - "grad_norm": 1.4247055053710938, - "learning_rate": 1.9853014845854182e-05, - "loss": 0.5941, - "step": 204610 - }, - { - "epoch": 1.8089075125090615, - "grad_norm": 2.537128448486328, - "learning_rate": 1.9851541458182314e-05, - "loss": 0.5751, - "step": 204620 - }, - { - "epoch": 1.8089959157693736, - "grad_norm": 3.9799728393554688, - "learning_rate": 1.9850068070510442e-05, - "loss": 0.5171, - "step": 204630 - }, - { - "epoch": 1.8090843190296857, - "grad_norm": 1.9004215002059937, - "learning_rate": 1.984859468283857e-05, - "loss": 0.694, - "step": 204640 - }, - { - "epoch": 1.809172722289998, - "grad_norm": 2.632812261581421, - "learning_rate": 1.9847121295166702e-05, - "loss": 0.5829, - "step": 204650 - }, - { - "epoch": 1.8092611255503104, - "grad_norm": 1.5173672437667847, - "learning_rate": 1.984564790749483e-05, - "loss": 0.5527, - "step": 204660 - }, - { - "epoch": 1.8093495288106225, - "grad_norm": 0.7835744619369507, - "learning_rate": 1.984417451982296e-05, - "loss": 0.5764, - "step": 204670 - }, - { - "epoch": 1.8094379320709346, - "grad_norm": 7.133487224578857, - "learning_rate": 1.9842701132151087e-05, - "loss": 0.6719, - "step": 204680 - }, - { - "epoch": 1.8095263353312472, - "grad_norm": 2.5694580078125, - "learning_rate": 1.984122774447922e-05, - "loss": 0.5588, - "step": 204690 - }, - { - "epoch": 1.8096147385915593, - "grad_norm": 2.642927646636963, - "learning_rate": 1.9839754356807347e-05, - "loss": 0.6477, - "step": 204700 - }, - { - "epoch": 1.8097031418518714, - "grad_norm": 3.1200151443481445, - "learning_rate": 1.9838280969135475e-05, - "loss": 0.5816, - "step": 204710 - }, - { - "epoch": 1.8097915451121838, - "grad_norm": 4.143441200256348, - "learning_rate": 1.9836807581463604e-05, - "loss": 0.5273, - "step": 204720 - }, - { - "epoch": 1.809879948372496, - "grad_norm": 4.186145782470703, - "learning_rate": 1.9835334193791736e-05, - "loss": 0.6081, - "step": 204730 - }, - { - "epoch": 1.8099683516328082, - "grad_norm": 32.160648345947266, - "learning_rate": 1.9833860806119864e-05, - "loss": 0.6195, - "step": 204740 - }, - { - "epoch": 1.8100567548931203, - "grad_norm": 1.5172966718673706, - "learning_rate": 1.9832387418447992e-05, - "loss": 0.6532, - "step": 204750 - }, - { - "epoch": 1.8101451581534327, - "grad_norm": 1.7198430299758911, - "learning_rate": 1.9830914030776124e-05, - "loss": 0.6428, - "step": 204760 - }, - { - "epoch": 1.810233561413745, - "grad_norm": 2.6470184326171875, - "learning_rate": 1.9829440643104252e-05, - "loss": 0.4808, - "step": 204770 - }, - { - "epoch": 1.8103219646740571, - "grad_norm": 5.744252681732178, - "learning_rate": 1.982796725543238e-05, - "loss": 0.6234, - "step": 204780 - }, - { - "epoch": 1.8104103679343693, - "grad_norm": 1.3214352130889893, - "learning_rate": 1.982649386776051e-05, - "loss": 0.4612, - "step": 204790 - }, - { - "epoch": 1.8104987711946816, - "grad_norm": 0.8004894852638245, - "learning_rate": 1.982502048008864e-05, - "loss": 0.47, - "step": 204800 - }, - { - "epoch": 1.810587174454994, - "grad_norm": 3.214850425720215, - "learning_rate": 1.982354709241677e-05, - "loss": 0.5348, - "step": 204810 - }, - { - "epoch": 1.810675577715306, - "grad_norm": 3.740530252456665, - "learning_rate": 1.9822073704744898e-05, - "loss": 0.628, - "step": 204820 - }, - { - "epoch": 1.8107639809756184, - "grad_norm": 1.620605230331421, - "learning_rate": 1.982060031707303e-05, - "loss": 0.4869, - "step": 204830 - }, - { - "epoch": 1.8108523842359308, - "grad_norm": 7.022885322570801, - "learning_rate": 1.9819126929401158e-05, - "loss": 0.5965, - "step": 204840 - }, - { - "epoch": 1.8109407874962429, - "grad_norm": 3.8251516819000244, - "learning_rate": 1.9817653541729286e-05, - "loss": 0.5974, - "step": 204850 - }, - { - "epoch": 1.811029190756555, - "grad_norm": 1.8026121854782104, - "learning_rate": 1.9816180154057414e-05, - "loss": 0.5771, - "step": 204860 - }, - { - "epoch": 1.8111175940168673, - "grad_norm": 2.1831490993499756, - "learning_rate": 1.9814706766385546e-05, - "loss": 0.6034, - "step": 204870 - }, - { - "epoch": 1.8112059972771797, - "grad_norm": 3.5884084701538086, - "learning_rate": 1.9813233378713674e-05, - "loss": 0.619, - "step": 204880 - }, - { - "epoch": 1.8112944005374918, - "grad_norm": 15.207366943359375, - "learning_rate": 1.9811759991041803e-05, - "loss": 0.5584, - "step": 204890 - }, - { - "epoch": 1.811382803797804, - "grad_norm": 2.334613084793091, - "learning_rate": 1.981028660336993e-05, - "loss": 0.593, - "step": 204900 - }, - { - "epoch": 1.8114712070581163, - "grad_norm": 0.8896350264549255, - "learning_rate": 1.9808813215698063e-05, - "loss": 0.6255, - "step": 204910 - }, - { - "epoch": 1.8115596103184286, - "grad_norm": 2.5466392040252686, - "learning_rate": 1.980733982802619e-05, - "loss": 0.6236, - "step": 204920 - }, - { - "epoch": 1.8116480135787407, - "grad_norm": 7.370975017547607, - "learning_rate": 1.980586644035432e-05, - "loss": 0.5788, - "step": 204930 - }, - { - "epoch": 1.811736416839053, - "grad_norm": 6.6383376121521, - "learning_rate": 1.980439305268245e-05, - "loss": 0.6388, - "step": 204940 - }, - { - "epoch": 1.8118248200993654, - "grad_norm": 2.832657814025879, - "learning_rate": 1.980291966501058e-05, - "loss": 0.655, - "step": 204950 - }, - { - "epoch": 1.8119132233596775, - "grad_norm": 1.4920225143432617, - "learning_rate": 1.9801446277338708e-05, - "loss": 0.5131, - "step": 204960 - }, - { - "epoch": 1.8120016266199896, - "grad_norm": 3.6986663341522217, - "learning_rate": 1.9799972889666836e-05, - "loss": 0.5755, - "step": 204970 - }, - { - "epoch": 1.812090029880302, - "grad_norm": 2.3363704681396484, - "learning_rate": 1.9798499501994968e-05, - "loss": 0.5117, - "step": 204980 - }, - { - "epoch": 1.8121784331406143, - "grad_norm": 0.8071462512016296, - "learning_rate": 1.9797026114323096e-05, - "loss": 0.5865, - "step": 204990 - }, - { - "epoch": 1.8122668364009265, - "grad_norm": 4.943005561828613, - "learning_rate": 1.9795552726651225e-05, - "loss": 0.6755, - "step": 205000 - }, - { - "epoch": 1.8123552396612386, - "grad_norm": 1.0413872003555298, - "learning_rate": 1.9794079338979357e-05, - "loss": 0.5493, - "step": 205010 - }, - { - "epoch": 1.812443642921551, - "grad_norm": 1.3888115882873535, - "learning_rate": 1.9792605951307485e-05, - "loss": 0.6143, - "step": 205020 - }, - { - "epoch": 1.8125320461818633, - "grad_norm": 3.122053861618042, - "learning_rate": 1.9791132563635613e-05, - "loss": 0.5599, - "step": 205030 - }, - { - "epoch": 1.8126204494421754, - "grad_norm": 1.4189289808273315, - "learning_rate": 1.978965917596374e-05, - "loss": 0.5805, - "step": 205040 - }, - { - "epoch": 1.8127088527024877, - "grad_norm": 5.1583638191223145, - "learning_rate": 1.9788185788291873e-05, - "loss": 0.4908, - "step": 205050 - }, - { - "epoch": 1.8127972559628, - "grad_norm": 2.4484779834747314, - "learning_rate": 1.9786712400620002e-05, - "loss": 0.5656, - "step": 205060 - }, - { - "epoch": 1.8128856592231122, - "grad_norm": 1.7282007932662964, - "learning_rate": 1.978523901294813e-05, - "loss": 0.6116, - "step": 205070 - }, - { - "epoch": 1.8129740624834243, - "grad_norm": 6.115036487579346, - "learning_rate": 1.978376562527626e-05, - "loss": 0.5016, - "step": 205080 - }, - { - "epoch": 1.8130624657437366, - "grad_norm": 3.375054121017456, - "learning_rate": 1.978229223760439e-05, - "loss": 0.5012, - "step": 205090 - }, - { - "epoch": 1.813150869004049, - "grad_norm": 2.5764636993408203, - "learning_rate": 1.978081884993252e-05, - "loss": 0.5606, - "step": 205100 - }, - { - "epoch": 1.813239272264361, - "grad_norm": 1.6634795665740967, - "learning_rate": 1.9779345462260647e-05, - "loss": 0.6111, - "step": 205110 - }, - { - "epoch": 1.8133276755246732, - "grad_norm": 7.311112403869629, - "learning_rate": 1.977787207458878e-05, - "loss": 0.5079, - "step": 205120 - }, - { - "epoch": 1.8134160787849856, - "grad_norm": 2.988346815109253, - "learning_rate": 1.9776398686916907e-05, - "loss": 0.5101, - "step": 205130 - }, - { - "epoch": 1.813504482045298, - "grad_norm": 2.2646923065185547, - "learning_rate": 1.9774925299245035e-05, - "loss": 0.6477, - "step": 205140 - }, - { - "epoch": 1.81359288530561, - "grad_norm": 5.640881538391113, - "learning_rate": 1.9773451911573164e-05, - "loss": 0.6112, - "step": 205150 - }, - { - "epoch": 1.8136812885659221, - "grad_norm": 4.360267162322998, - "learning_rate": 1.9771978523901295e-05, - "loss": 0.5578, - "step": 205160 - }, - { - "epoch": 1.8137696918262347, - "grad_norm": 2.666182041168213, - "learning_rate": 1.9770505136229424e-05, - "loss": 0.5626, - "step": 205170 - }, - { - "epoch": 1.8138580950865468, - "grad_norm": 1.572108268737793, - "learning_rate": 1.9769031748557552e-05, - "loss": 0.6473, - "step": 205180 - }, - { - "epoch": 1.813946498346859, - "grad_norm": 2.5407955646514893, - "learning_rate": 1.9767558360885684e-05, - "loss": 0.5364, - "step": 205190 - }, - { - "epoch": 1.8140349016071713, - "grad_norm": 5.172571182250977, - "learning_rate": 1.9766084973213812e-05, - "loss": 0.4815, - "step": 205200 - }, - { - "epoch": 1.8141233048674836, - "grad_norm": 4.145648956298828, - "learning_rate": 1.976461158554194e-05, - "loss": 0.6249, - "step": 205210 - }, - { - "epoch": 1.8142117081277958, - "grad_norm": 6.628396987915039, - "learning_rate": 1.9763138197870072e-05, - "loss": 0.4574, - "step": 205220 - }, - { - "epoch": 1.8143001113881079, - "grad_norm": 4.403090476989746, - "learning_rate": 1.97616648101982e-05, - "loss": 0.5821, - "step": 205230 - }, - { - "epoch": 1.8143885146484202, - "grad_norm": 3.767047882080078, - "learning_rate": 1.976019142252633e-05, - "loss": 0.5941, - "step": 205240 - }, - { - "epoch": 1.8144769179087326, - "grad_norm": 2.1737284660339355, - "learning_rate": 1.975871803485446e-05, - "loss": 0.5473, - "step": 205250 - }, - { - "epoch": 1.8145653211690447, - "grad_norm": 2.667391061782837, - "learning_rate": 1.975724464718259e-05, - "loss": 0.4895, - "step": 205260 - }, - { - "epoch": 1.8146537244293568, - "grad_norm": 16.818302154541016, - "learning_rate": 1.9755771259510718e-05, - "loss": 0.609, - "step": 205270 - }, - { - "epoch": 1.8147421276896694, - "grad_norm": 1.2136601209640503, - "learning_rate": 1.975429787183885e-05, - "loss": 0.5769, - "step": 205280 - }, - { - "epoch": 1.8148305309499815, - "grad_norm": 6.928625106811523, - "learning_rate": 1.9752824484166978e-05, - "loss": 0.6335, - "step": 205290 - }, - { - "epoch": 1.8149189342102936, - "grad_norm": 6.01639461517334, - "learning_rate": 1.9751351096495106e-05, - "loss": 0.4662, - "step": 205300 - }, - { - "epoch": 1.815007337470606, - "grad_norm": 9.724088668823242, - "learning_rate": 1.9749877708823238e-05, - "loss": 0.4604, - "step": 205310 - }, - { - "epoch": 1.8150957407309183, - "grad_norm": 3.2025539875030518, - "learning_rate": 1.9748404321151366e-05, - "loss": 0.8161, - "step": 205320 - }, - { - "epoch": 1.8151841439912304, - "grad_norm": 2.018144369125366, - "learning_rate": 1.9746930933479494e-05, - "loss": 0.5801, - "step": 205330 - }, - { - "epoch": 1.8152725472515425, - "grad_norm": 2.981337308883667, - "learning_rate": 1.9745457545807626e-05, - "loss": 0.5227, - "step": 205340 - }, - { - "epoch": 1.8153609505118549, - "grad_norm": 2.2382495403289795, - "learning_rate": 1.9743984158135755e-05, - "loss": 0.5619, - "step": 205350 - }, - { - "epoch": 1.8154493537721672, - "grad_norm": 1.552259087562561, - "learning_rate": 1.9742510770463883e-05, - "loss": 0.6834, - "step": 205360 - }, - { - "epoch": 1.8155377570324793, - "grad_norm": 1.2444350719451904, - "learning_rate": 1.974103738279201e-05, - "loss": 0.5761, - "step": 205370 - }, - { - "epoch": 1.8156261602927914, - "grad_norm": 3.165355920791626, - "learning_rate": 1.9739563995120143e-05, - "loss": 0.7512, - "step": 205380 - }, - { - "epoch": 1.8157145635531038, - "grad_norm": 2.6302196979522705, - "learning_rate": 1.973809060744827e-05, - "loss": 0.6307, - "step": 205390 - }, - { - "epoch": 1.8158029668134161, - "grad_norm": 2.225032091140747, - "learning_rate": 1.97366172197764e-05, - "loss": 0.5078, - "step": 205400 - }, - { - "epoch": 1.8158913700737283, - "grad_norm": 1.9014170169830322, - "learning_rate": 1.973514383210453e-05, - "loss": 0.5053, - "step": 205410 - }, - { - "epoch": 1.8159797733340406, - "grad_norm": 3.0980594158172607, - "learning_rate": 1.973367044443266e-05, - "loss": 0.56, - "step": 205420 - }, - { - "epoch": 1.816068176594353, - "grad_norm": 1.409366250038147, - "learning_rate": 1.9732197056760788e-05, - "loss": 0.3925, - "step": 205430 - }, - { - "epoch": 1.816156579854665, - "grad_norm": 5.884101867675781, - "learning_rate": 1.9730723669088916e-05, - "loss": 0.5774, - "step": 205440 - }, - { - "epoch": 1.8162449831149772, - "grad_norm": 5.426363468170166, - "learning_rate": 1.9729250281417048e-05, - "loss": 0.5905, - "step": 205450 - }, - { - "epoch": 1.8163333863752895, - "grad_norm": 1.9328361749649048, - "learning_rate": 1.9727776893745177e-05, - "loss": 0.6275, - "step": 205460 - }, - { - "epoch": 1.8164217896356019, - "grad_norm": 3.0021536350250244, - "learning_rate": 1.9726303506073305e-05, - "loss": 0.5065, - "step": 205470 - }, - { - "epoch": 1.816510192895914, - "grad_norm": 15.42954158782959, - "learning_rate": 1.9724830118401437e-05, - "loss": 0.5191, - "step": 205480 - }, - { - "epoch": 1.816598596156226, - "grad_norm": 5.228804588317871, - "learning_rate": 1.9723356730729565e-05, - "loss": 0.6386, - "step": 205490 - }, - { - "epoch": 1.8166869994165384, - "grad_norm": 0.9492586851119995, - "learning_rate": 1.9721883343057693e-05, - "loss": 0.5185, - "step": 205500 - }, - { - "epoch": 1.8167754026768508, - "grad_norm": 3.6664984226226807, - "learning_rate": 1.9720409955385822e-05, - "loss": 0.6452, - "step": 205510 - }, - { - "epoch": 1.816863805937163, - "grad_norm": 1.380125880241394, - "learning_rate": 1.9718936567713953e-05, - "loss": 0.687, - "step": 205520 - }, - { - "epoch": 1.8169522091974752, - "grad_norm": 2.0596303939819336, - "learning_rate": 1.9717463180042082e-05, - "loss": 0.6561, - "step": 205530 - }, - { - "epoch": 1.8170406124577876, - "grad_norm": 1.2801146507263184, - "learning_rate": 1.971598979237021e-05, - "loss": 0.6626, - "step": 205540 - }, - { - "epoch": 1.8171290157180997, - "grad_norm": 2.1601812839508057, - "learning_rate": 1.971451640469834e-05, - "loss": 0.5393, - "step": 205550 - }, - { - "epoch": 1.8172174189784118, - "grad_norm": 4.125202178955078, - "learning_rate": 1.971304301702647e-05, - "loss": 0.5652, - "step": 205560 - }, - { - "epoch": 1.8173058222387242, - "grad_norm": 9.637201309204102, - "learning_rate": 1.97115696293546e-05, - "loss": 0.5294, - "step": 205570 - }, - { - "epoch": 1.8173942254990365, - "grad_norm": 2.6966235637664795, - "learning_rate": 1.9710096241682727e-05, - "loss": 0.5471, - "step": 205580 - }, - { - "epoch": 1.8174826287593486, - "grad_norm": 1.6010628938674927, - "learning_rate": 1.970862285401086e-05, - "loss": 0.5847, - "step": 205590 - }, - { - "epoch": 1.8175710320196607, - "grad_norm": 1.595720887184143, - "learning_rate": 1.9707149466338987e-05, - "loss": 0.5632, - "step": 205600 - }, - { - "epoch": 1.817659435279973, - "grad_norm": 6.063867568969727, - "learning_rate": 1.9705676078667115e-05, - "loss": 0.6014, - "step": 205610 - }, - { - "epoch": 1.8177478385402854, - "grad_norm": 1.8563518524169922, - "learning_rate": 1.9704202690995244e-05, - "loss": 0.5215, - "step": 205620 - }, - { - "epoch": 1.8178362418005976, - "grad_norm": 6.146674156188965, - "learning_rate": 1.9702729303323376e-05, - "loss": 0.4971, - "step": 205630 - }, - { - "epoch": 1.81792464506091, - "grad_norm": 7.920614242553711, - "learning_rate": 1.9701255915651504e-05, - "loss": 0.7799, - "step": 205640 - }, - { - "epoch": 1.8180130483212222, - "grad_norm": 1.5419124364852905, - "learning_rate": 1.9699782527979632e-05, - "loss": 0.5308, - "step": 205650 - }, - { - "epoch": 1.8181014515815344, - "grad_norm": 4.718521595001221, - "learning_rate": 1.9698309140307764e-05, - "loss": 0.5452, - "step": 205660 - }, - { - "epoch": 1.8181898548418465, - "grad_norm": 3.180023193359375, - "learning_rate": 1.9696835752635892e-05, - "loss": 0.7371, - "step": 205670 - }, - { - "epoch": 1.8182782581021588, - "grad_norm": 4.409896373748779, - "learning_rate": 1.969536236496402e-05, - "loss": 0.6145, - "step": 205680 - }, - { - "epoch": 1.8183666613624712, - "grad_norm": 4.773332118988037, - "learning_rate": 1.969388897729215e-05, - "loss": 0.5614, - "step": 205690 - }, - { - "epoch": 1.8184550646227833, - "grad_norm": 8.93399429321289, - "learning_rate": 1.969241558962028e-05, - "loss": 0.5681, - "step": 205700 - }, - { - "epoch": 1.8185434678830954, - "grad_norm": 5.823650360107422, - "learning_rate": 1.969094220194841e-05, - "loss": 0.665, - "step": 205710 - }, - { - "epoch": 1.8186318711434077, - "grad_norm": 2.985886335372925, - "learning_rate": 1.9689468814276537e-05, - "loss": 0.7392, - "step": 205720 - }, - { - "epoch": 1.81872027440372, - "grad_norm": 15.467670440673828, - "learning_rate": 1.9687995426604666e-05, - "loss": 0.6906, - "step": 205730 - }, - { - "epoch": 1.8188086776640322, - "grad_norm": 8.271402359008789, - "learning_rate": 1.9686522038932798e-05, - "loss": 0.6446, - "step": 205740 - }, - { - "epoch": 1.8188970809243445, - "grad_norm": 3.8750271797180176, - "learning_rate": 1.9685048651260926e-05, - "loss": 0.5974, - "step": 205750 - }, - { - "epoch": 1.8189854841846569, - "grad_norm": 2.3784050941467285, - "learning_rate": 1.9683575263589054e-05, - "loss": 0.4477, - "step": 205760 - }, - { - "epoch": 1.819073887444969, - "grad_norm": 1.3976037502288818, - "learning_rate": 1.9682101875917186e-05, - "loss": 0.6801, - "step": 205770 - }, - { - "epoch": 1.8191622907052811, - "grad_norm": 6.066532611846924, - "learning_rate": 1.9680628488245314e-05, - "loss": 0.6771, - "step": 205780 - }, - { - "epoch": 1.8192506939655935, - "grad_norm": 5.840319633483887, - "learning_rate": 1.9679155100573443e-05, - "loss": 0.5399, - "step": 205790 - }, - { - "epoch": 1.8193390972259058, - "grad_norm": 4.8559489250183105, - "learning_rate": 1.967768171290157e-05, - "loss": 0.4738, - "step": 205800 - }, - { - "epoch": 1.819427500486218, - "grad_norm": 8.714665412902832, - "learning_rate": 1.9676208325229703e-05, - "loss": 0.7982, - "step": 205810 - }, - { - "epoch": 1.81951590374653, - "grad_norm": 6.71140718460083, - "learning_rate": 1.967473493755783e-05, - "loss": 0.5827, - "step": 205820 - }, - { - "epoch": 1.8196043070068424, - "grad_norm": 2.8837430477142334, - "learning_rate": 1.967326154988596e-05, - "loss": 0.6267, - "step": 205830 - }, - { - "epoch": 1.8196927102671547, - "grad_norm": 6.497797012329102, - "learning_rate": 1.9671788162214088e-05, - "loss": 0.6017, - "step": 205840 - }, - { - "epoch": 1.8197811135274669, - "grad_norm": 2.312929153442383, - "learning_rate": 1.967031477454222e-05, - "loss": 0.5694, - "step": 205850 - }, - { - "epoch": 1.819869516787779, - "grad_norm": 4.205124855041504, - "learning_rate": 1.9668841386870348e-05, - "loss": 0.6344, - "step": 205860 - }, - { - "epoch": 1.8199579200480915, - "grad_norm": 2.381417751312256, - "learning_rate": 1.9667367999198476e-05, - "loss": 0.5678, - "step": 205870 - }, - { - "epoch": 1.8200463233084037, - "grad_norm": 2.740489959716797, - "learning_rate": 1.9665894611526608e-05, - "loss": 0.6498, - "step": 205880 - }, - { - "epoch": 1.8201347265687158, - "grad_norm": 8.038272857666016, - "learning_rate": 1.9664421223854736e-05, - "loss": 0.6189, - "step": 205890 - }, - { - "epoch": 1.8202231298290281, - "grad_norm": 0.5989644527435303, - "learning_rate": 1.9662947836182865e-05, - "loss": 0.5195, - "step": 205900 - }, - { - "epoch": 1.8203115330893405, - "grad_norm": 7.084933280944824, - "learning_rate": 1.9661474448510993e-05, - "loss": 0.571, - "step": 205910 - }, - { - "epoch": 1.8203999363496526, - "grad_norm": 11.091043472290039, - "learning_rate": 1.9660001060839125e-05, - "loss": 0.6413, - "step": 205920 - }, - { - "epoch": 1.8204883396099647, - "grad_norm": 1.5782756805419922, - "learning_rate": 1.9658527673167253e-05, - "loss": 0.5706, - "step": 205930 - }, - { - "epoch": 1.820576742870277, - "grad_norm": 4.537599563598633, - "learning_rate": 1.965705428549538e-05, - "loss": 0.5001, - "step": 205940 - }, - { - "epoch": 1.8206651461305894, - "grad_norm": 1.3987971544265747, - "learning_rate": 1.9655580897823513e-05, - "loss": 0.5765, - "step": 205950 - }, - { - "epoch": 1.8207535493909015, - "grad_norm": 3.5085034370422363, - "learning_rate": 1.9654107510151642e-05, - "loss": 0.4726, - "step": 205960 - }, - { - "epoch": 1.8208419526512136, - "grad_norm": 4.559741020202637, - "learning_rate": 1.965263412247977e-05, - "loss": 0.5985, - "step": 205970 - }, - { - "epoch": 1.820930355911526, - "grad_norm": 1.4537702798843384, - "learning_rate": 1.96511607348079e-05, - "loss": 0.4433, - "step": 205980 - }, - { - "epoch": 1.8210187591718383, - "grad_norm": 15.174161911010742, - "learning_rate": 1.964968734713603e-05, - "loss": 0.5866, - "step": 205990 - }, - { - "epoch": 1.8211071624321504, - "grad_norm": 5.180905818939209, - "learning_rate": 1.964821395946416e-05, - "loss": 0.5105, - "step": 206000 - }, - { - "epoch": 1.8211955656924628, - "grad_norm": 1.7084853649139404, - "learning_rate": 1.9646740571792287e-05, - "loss": 0.6123, - "step": 206010 - }, - { - "epoch": 1.8212839689527751, - "grad_norm": 2.7740683555603027, - "learning_rate": 1.9645267184120415e-05, - "loss": 0.6021, - "step": 206020 - }, - { - "epoch": 1.8213723722130872, - "grad_norm": 4.2755913734436035, - "learning_rate": 1.9643793796448547e-05, - "loss": 0.5733, - "step": 206030 - }, - { - "epoch": 1.8214607754733994, - "grad_norm": 2.595569372177124, - "learning_rate": 1.9642320408776675e-05, - "loss": 0.5537, - "step": 206040 - }, - { - "epoch": 1.8215491787337117, - "grad_norm": 2.639146566390991, - "learning_rate": 1.9640847021104804e-05, - "loss": 0.7424, - "step": 206050 - }, - { - "epoch": 1.821637581994024, - "grad_norm": 1.8448108434677124, - "learning_rate": 1.9639373633432935e-05, - "loss": 0.5973, - "step": 206060 - }, - { - "epoch": 1.8217259852543362, - "grad_norm": 5.290491104125977, - "learning_rate": 1.9637900245761064e-05, - "loss": 0.685, - "step": 206070 - }, - { - "epoch": 1.8218143885146483, - "grad_norm": 3.2655539512634277, - "learning_rate": 1.9636426858089192e-05, - "loss": 0.5121, - "step": 206080 - }, - { - "epoch": 1.8219027917749606, - "grad_norm": 3.944739818572998, - "learning_rate": 1.963495347041732e-05, - "loss": 0.6442, - "step": 206090 - }, - { - "epoch": 1.821991195035273, - "grad_norm": 34.24992752075195, - "learning_rate": 1.9633480082745452e-05, - "loss": 0.6488, - "step": 206100 - }, - { - "epoch": 1.822079598295585, - "grad_norm": 1.8902596235275269, - "learning_rate": 1.963200669507358e-05, - "loss": 0.5305, - "step": 206110 - }, - { - "epoch": 1.8221680015558974, - "grad_norm": 2.227578639984131, - "learning_rate": 1.963053330740171e-05, - "loss": 0.5316, - "step": 206120 - }, - { - "epoch": 1.8222564048162098, - "grad_norm": 1.9944559335708618, - "learning_rate": 1.962905991972984e-05, - "loss": 0.5433, - "step": 206130 - }, - { - "epoch": 1.8223448080765219, - "grad_norm": 2.3997745513916016, - "learning_rate": 1.962758653205797e-05, - "loss": 0.7141, - "step": 206140 - }, - { - "epoch": 1.822433211336834, - "grad_norm": 1.6116048097610474, - "learning_rate": 1.9626113144386097e-05, - "loss": 0.631, - "step": 206150 - }, - { - "epoch": 1.8225216145971463, - "grad_norm": 1.3154655694961548, - "learning_rate": 1.962463975671423e-05, - "loss": 0.5164, - "step": 206160 - }, - { - "epoch": 1.8226100178574587, - "grad_norm": 5.549624919891357, - "learning_rate": 1.9623166369042357e-05, - "loss": 0.6743, - "step": 206170 - }, - { - "epoch": 1.8226984211177708, - "grad_norm": 2.2603037357330322, - "learning_rate": 1.9621692981370486e-05, - "loss": 0.5922, - "step": 206180 - }, - { - "epoch": 1.822786824378083, - "grad_norm": 3.52301025390625, - "learning_rate": 1.9620219593698618e-05, - "loss": 0.4822, - "step": 206190 - }, - { - "epoch": 1.8228752276383953, - "grad_norm": 5.003649711608887, - "learning_rate": 1.9618746206026746e-05, - "loss": 0.5354, - "step": 206200 - }, - { - "epoch": 1.8229636308987076, - "grad_norm": 2.923891305923462, - "learning_rate": 1.9617272818354878e-05, - "loss": 0.4701, - "step": 206210 - }, - { - "epoch": 1.8230520341590197, - "grad_norm": 0.8332355618476868, - "learning_rate": 1.9615799430683006e-05, - "loss": 0.5434, - "step": 206220 - }, - { - "epoch": 1.823140437419332, - "grad_norm": 1.7102841138839722, - "learning_rate": 1.9614326043011134e-05, - "loss": 0.5627, - "step": 206230 - }, - { - "epoch": 1.8232288406796444, - "grad_norm": 10.538195610046387, - "learning_rate": 1.9612852655339266e-05, - "loss": 0.6224, - "step": 206240 - }, - { - "epoch": 1.8233172439399565, - "grad_norm": 3.4315640926361084, - "learning_rate": 1.9611379267667394e-05, - "loss": 0.6502, - "step": 206250 - }, - { - "epoch": 1.8234056472002687, - "grad_norm": 1.2188180685043335, - "learning_rate": 1.9609905879995523e-05, - "loss": 0.6174, - "step": 206260 - }, - { - "epoch": 1.823494050460581, - "grad_norm": 6.603806018829346, - "learning_rate": 1.960843249232365e-05, - "loss": 0.4602, - "step": 206270 - }, - { - "epoch": 1.8235824537208933, - "grad_norm": 4.472281455993652, - "learning_rate": 1.9606959104651783e-05, - "loss": 0.537, - "step": 206280 - }, - { - "epoch": 1.8236708569812055, - "grad_norm": 1.5111479759216309, - "learning_rate": 1.960548571697991e-05, - "loss": 0.6231, - "step": 206290 - }, - { - "epoch": 1.8237592602415176, - "grad_norm": 1.1447303295135498, - "learning_rate": 1.960401232930804e-05, - "loss": 0.4651, - "step": 206300 - }, - { - "epoch": 1.82384766350183, - "grad_norm": 1.6166595220565796, - "learning_rate": 1.9602538941636168e-05, - "loss": 0.6349, - "step": 206310 - }, - { - "epoch": 1.8239360667621423, - "grad_norm": 3.1219429969787598, - "learning_rate": 1.96010655539643e-05, - "loss": 0.6483, - "step": 206320 - }, - { - "epoch": 1.8240244700224544, - "grad_norm": 1.9796806573867798, - "learning_rate": 1.9599592166292428e-05, - "loss": 0.6497, - "step": 206330 - }, - { - "epoch": 1.8241128732827667, - "grad_norm": 2.0331077575683594, - "learning_rate": 1.9598118778620556e-05, - "loss": 0.6726, - "step": 206340 - }, - { - "epoch": 1.824201276543079, - "grad_norm": 3.3719019889831543, - "learning_rate": 1.9596645390948688e-05, - "loss": 0.5446, - "step": 206350 - }, - { - "epoch": 1.8242896798033912, - "grad_norm": 1.7892955541610718, - "learning_rate": 1.9595172003276817e-05, - "loss": 0.5504, - "step": 206360 - }, - { - "epoch": 1.8243780830637033, - "grad_norm": 4.419246673583984, - "learning_rate": 1.9593698615604945e-05, - "loss": 0.515, - "step": 206370 - }, - { - "epoch": 1.8244664863240156, - "grad_norm": 2.4962549209594727, - "learning_rate": 1.9592225227933073e-05, - "loss": 0.5677, - "step": 206380 - }, - { - "epoch": 1.824554889584328, - "grad_norm": 15.308316230773926, - "learning_rate": 1.9590751840261205e-05, - "loss": 0.5564, - "step": 206390 - }, - { - "epoch": 1.82464329284464, - "grad_norm": 3.7508552074432373, - "learning_rate": 1.9589278452589333e-05, - "loss": 0.5001, - "step": 206400 - }, - { - "epoch": 1.8247316961049522, - "grad_norm": 0.7837414741516113, - "learning_rate": 1.958780506491746e-05, - "loss": 0.5693, - "step": 206410 - }, - { - "epoch": 1.8248200993652646, - "grad_norm": 9.835325241088867, - "learning_rate": 1.9586331677245593e-05, - "loss": 0.5738, - "step": 206420 - }, - { - "epoch": 1.824908502625577, - "grad_norm": 4.901032447814941, - "learning_rate": 1.9584858289573722e-05, - "loss": 0.5124, - "step": 206430 - }, - { - "epoch": 1.824996905885889, - "grad_norm": 3.4703445434570312, - "learning_rate": 1.958338490190185e-05, - "loss": 0.5813, - "step": 206440 - }, - { - "epoch": 1.8250853091462012, - "grad_norm": 3.4250540733337402, - "learning_rate": 1.958191151422998e-05, - "loss": 0.5784, - "step": 206450 - }, - { - "epoch": 1.8251737124065137, - "grad_norm": 1.2305299043655396, - "learning_rate": 1.958043812655811e-05, - "loss": 0.4881, - "step": 206460 - }, - { - "epoch": 1.8252621156668258, - "grad_norm": 2.2637906074523926, - "learning_rate": 1.957896473888624e-05, - "loss": 0.5897, - "step": 206470 - }, - { - "epoch": 1.825350518927138, - "grad_norm": 3.5360212326049805, - "learning_rate": 1.9577491351214367e-05, - "loss": 0.6305, - "step": 206480 - }, - { - "epoch": 1.8254389221874503, - "grad_norm": 9.073210716247559, - "learning_rate": 1.9576017963542495e-05, - "loss": 0.6322, - "step": 206490 - }, - { - "epoch": 1.8255273254477626, - "grad_norm": 1.8201502561569214, - "learning_rate": 1.9574544575870627e-05, - "loss": 0.4767, - "step": 206500 - }, - { - "epoch": 1.8256157287080748, - "grad_norm": 8.53696060180664, - "learning_rate": 1.9573071188198755e-05, - "loss": 0.663, - "step": 206510 - }, - { - "epoch": 1.8257041319683869, - "grad_norm": 1.5252351760864258, - "learning_rate": 1.9571597800526884e-05, - "loss": 0.5231, - "step": 206520 - }, - { - "epoch": 1.8257925352286992, - "grad_norm": 5.737728595733643, - "learning_rate": 1.9570124412855015e-05, - "loss": 0.5788, - "step": 206530 - }, - { - "epoch": 1.8258809384890116, - "grad_norm": 1.994388222694397, - "learning_rate": 1.9568651025183144e-05, - "loss": 0.6812, - "step": 206540 - }, - { - "epoch": 1.8259693417493237, - "grad_norm": 3.8378353118896484, - "learning_rate": 1.9567177637511272e-05, - "loss": 0.6631, - "step": 206550 - }, - { - "epoch": 1.8260577450096358, - "grad_norm": 1.4128046035766602, - "learning_rate": 1.95657042498394e-05, - "loss": 0.5389, - "step": 206560 - }, - { - "epoch": 1.8261461482699481, - "grad_norm": 6.838903903961182, - "learning_rate": 1.9564230862167532e-05, - "loss": 0.6888, - "step": 206570 - }, - { - "epoch": 1.8262345515302605, - "grad_norm": 7.118066787719727, - "learning_rate": 1.956275747449566e-05, - "loss": 0.5024, - "step": 206580 - }, - { - "epoch": 1.8263229547905726, - "grad_norm": 43.27200698852539, - "learning_rate": 1.956128408682379e-05, - "loss": 0.6193, - "step": 206590 - }, - { - "epoch": 1.826411358050885, - "grad_norm": 2.128223180770874, - "learning_rate": 1.955981069915192e-05, - "loss": 0.4912, - "step": 206600 - }, - { - "epoch": 1.8264997613111973, - "grad_norm": 4.299762725830078, - "learning_rate": 1.955833731148005e-05, - "loss": 0.4791, - "step": 206610 - }, - { - "epoch": 1.8265881645715094, - "grad_norm": 1.7000281810760498, - "learning_rate": 1.9556863923808177e-05, - "loss": 0.472, - "step": 206620 - }, - { - "epoch": 1.8266765678318215, - "grad_norm": 4.285869121551514, - "learning_rate": 1.9555390536136306e-05, - "loss": 0.7635, - "step": 206630 - }, - { - "epoch": 1.8267649710921339, - "grad_norm": 2.826352119445801, - "learning_rate": 1.9553917148464438e-05, - "loss": 0.5418, - "step": 206640 - }, - { - "epoch": 1.8268533743524462, - "grad_norm": 1.4064503908157349, - "learning_rate": 1.9552443760792566e-05, - "loss": 0.5626, - "step": 206650 - }, - { - "epoch": 1.8269417776127583, - "grad_norm": 1.5571008920669556, - "learning_rate": 1.9550970373120694e-05, - "loss": 0.5358, - "step": 206660 - }, - { - "epoch": 1.8270301808730705, - "grad_norm": 4.954158306121826, - "learning_rate": 1.9549496985448823e-05, - "loss": 0.5906, - "step": 206670 - }, - { - "epoch": 1.8271185841333828, - "grad_norm": 2.0399539470672607, - "learning_rate": 1.9548023597776954e-05, - "loss": 0.5853, - "step": 206680 - }, - { - "epoch": 1.8272069873936951, - "grad_norm": 0.9200155138969421, - "learning_rate": 1.9546550210105083e-05, - "loss": 0.6993, - "step": 206690 - }, - { - "epoch": 1.8272953906540073, - "grad_norm": 3.1453781127929688, - "learning_rate": 1.954507682243321e-05, - "loss": 0.6423, - "step": 206700 - }, - { - "epoch": 1.8273837939143196, - "grad_norm": 2.170257329940796, - "learning_rate": 1.9543603434761343e-05, - "loss": 0.6632, - "step": 206710 - }, - { - "epoch": 1.827472197174632, - "grad_norm": 2.5499367713928223, - "learning_rate": 1.954213004708947e-05, - "loss": 0.5534, - "step": 206720 - }, - { - "epoch": 1.827560600434944, - "grad_norm": 11.52445125579834, - "learning_rate": 1.95406566594176e-05, - "loss": 0.6607, - "step": 206730 - }, - { - "epoch": 1.8276490036952562, - "grad_norm": 1.7752468585968018, - "learning_rate": 1.9539183271745728e-05, - "loss": 0.5646, - "step": 206740 - }, - { - "epoch": 1.8277374069555685, - "grad_norm": 1.7741189002990723, - "learning_rate": 1.953770988407386e-05, - "loss": 0.527, - "step": 206750 - }, - { - "epoch": 1.8278258102158809, - "grad_norm": 3.750159502029419, - "learning_rate": 1.9536236496401988e-05, - "loss": 0.738, - "step": 206760 - }, - { - "epoch": 1.827914213476193, - "grad_norm": 3.8642637729644775, - "learning_rate": 1.9534763108730116e-05, - "loss": 0.6589, - "step": 206770 - }, - { - "epoch": 1.828002616736505, - "grad_norm": 4.268569469451904, - "learning_rate": 1.9533289721058248e-05, - "loss": 0.7822, - "step": 206780 - }, - { - "epoch": 1.8280910199968174, - "grad_norm": 6.819716453552246, - "learning_rate": 1.9531816333386376e-05, - "loss": 0.4744, - "step": 206790 - }, - { - "epoch": 1.8281794232571298, - "grad_norm": 1.9105000495910645, - "learning_rate": 1.9530342945714505e-05, - "loss": 0.5862, - "step": 206800 - }, - { - "epoch": 1.828267826517442, - "grad_norm": 2.9903974533081055, - "learning_rate": 1.9528869558042633e-05, - "loss": 0.6234, - "step": 206810 - }, - { - "epoch": 1.8283562297777542, - "grad_norm": 1.6201250553131104, - "learning_rate": 1.9527396170370765e-05, - "loss": 0.4748, - "step": 206820 - }, - { - "epoch": 1.8284446330380666, - "grad_norm": 1.895225167274475, - "learning_rate": 1.9525922782698893e-05, - "loss": 0.6045, - "step": 206830 - }, - { - "epoch": 1.8285330362983787, - "grad_norm": 1.1606491804122925, - "learning_rate": 1.952444939502702e-05, - "loss": 0.633, - "step": 206840 - }, - { - "epoch": 1.8286214395586908, - "grad_norm": 4.069981575012207, - "learning_rate": 1.952297600735515e-05, - "loss": 0.464, - "step": 206850 - }, - { - "epoch": 1.8287098428190032, - "grad_norm": 2.674436330795288, - "learning_rate": 1.952150261968328e-05, - "loss": 0.6591, - "step": 206860 - }, - { - "epoch": 1.8287982460793155, - "grad_norm": 0.8125573396682739, - "learning_rate": 1.952002923201141e-05, - "loss": 0.5678, - "step": 206870 - }, - { - "epoch": 1.8288866493396276, - "grad_norm": 17.65329933166504, - "learning_rate": 1.951855584433954e-05, - "loss": 0.6219, - "step": 206880 - }, - { - "epoch": 1.8289750525999398, - "grad_norm": 1.762826919555664, - "learning_rate": 1.951708245666767e-05, - "loss": 0.6162, - "step": 206890 - }, - { - "epoch": 1.829063455860252, - "grad_norm": 2.4930219650268555, - "learning_rate": 1.95156090689958e-05, - "loss": 0.554, - "step": 206900 - }, - { - "epoch": 1.8291518591205644, - "grad_norm": 1.571549892425537, - "learning_rate": 1.9514135681323927e-05, - "loss": 0.554, - "step": 206910 - }, - { - "epoch": 1.8292402623808766, - "grad_norm": 2.7987961769104004, - "learning_rate": 1.9512662293652055e-05, - "loss": 0.531, - "step": 206920 - }, - { - "epoch": 1.829328665641189, - "grad_norm": 10.366473197937012, - "learning_rate": 1.9511188905980187e-05, - "loss": 0.5679, - "step": 206930 - }, - { - "epoch": 1.8294170689015012, - "grad_norm": 2.8565518856048584, - "learning_rate": 1.9509715518308315e-05, - "loss": 0.7168, - "step": 206940 - }, - { - "epoch": 1.8295054721618134, - "grad_norm": 1.980823278427124, - "learning_rate": 1.9508242130636444e-05, - "loss": 0.5628, - "step": 206950 - }, - { - "epoch": 1.8295938754221255, - "grad_norm": 3.3893544673919678, - "learning_rate": 1.9506768742964572e-05, - "loss": 0.6066, - "step": 206960 - }, - { - "epoch": 1.8296822786824378, - "grad_norm": 0.9177488684654236, - "learning_rate": 1.9505295355292704e-05, - "loss": 0.6143, - "step": 206970 - }, - { - "epoch": 1.8297706819427502, - "grad_norm": 2.841909408569336, - "learning_rate": 1.9503821967620832e-05, - "loss": 0.5786, - "step": 206980 - }, - { - "epoch": 1.8298590852030623, - "grad_norm": 11.035330772399902, - "learning_rate": 1.950234857994896e-05, - "loss": 0.5647, - "step": 206990 - }, - { - "epoch": 1.8299474884633744, - "grad_norm": 6.5169148445129395, - "learning_rate": 1.9500875192277092e-05, - "loss": 0.5018, - "step": 207000 - }, - { - "epoch": 1.8300358917236867, - "grad_norm": 7.916037082672119, - "learning_rate": 1.949940180460522e-05, - "loss": 0.574, - "step": 207010 - }, - { - "epoch": 1.830124294983999, - "grad_norm": 1.599095106124878, - "learning_rate": 1.949792841693335e-05, - "loss": 0.5706, - "step": 207020 - }, - { - "epoch": 1.8302126982443112, - "grad_norm": 2.2294921875, - "learning_rate": 1.949645502926148e-05, - "loss": 0.5595, - "step": 207030 - }, - { - "epoch": 1.8303011015046233, - "grad_norm": 2.055271863937378, - "learning_rate": 1.949498164158961e-05, - "loss": 0.6021, - "step": 207040 - }, - { - "epoch": 1.830389504764936, - "grad_norm": 0.9322070479393005, - "learning_rate": 1.9493508253917737e-05, - "loss": 0.6375, - "step": 207050 - }, - { - "epoch": 1.830477908025248, - "grad_norm": 9.942620277404785, - "learning_rate": 1.949203486624587e-05, - "loss": 0.6126, - "step": 207060 - }, - { - "epoch": 1.8305663112855601, - "grad_norm": 6.288659572601318, - "learning_rate": 1.9490561478573997e-05, - "loss": 0.4625, - "step": 207070 - }, - { - "epoch": 1.8306547145458725, - "grad_norm": 2.519831657409668, - "learning_rate": 1.9489088090902126e-05, - "loss": 0.6937, - "step": 207080 - }, - { - "epoch": 1.8307431178061848, - "grad_norm": 3.0569796562194824, - "learning_rate": 1.9487614703230258e-05, - "loss": 0.5536, - "step": 207090 - }, - { - "epoch": 1.830831521066497, - "grad_norm": 4.443091869354248, - "learning_rate": 1.9486141315558386e-05, - "loss": 0.5247, - "step": 207100 - }, - { - "epoch": 1.830919924326809, - "grad_norm": 1.5366700887680054, - "learning_rate": 1.9484667927886514e-05, - "loss": 0.5371, - "step": 207110 - }, - { - "epoch": 1.8310083275871214, - "grad_norm": 2.45685076713562, - "learning_rate": 1.9483194540214646e-05, - "loss": 0.5259, - "step": 207120 - }, - { - "epoch": 1.8310967308474337, - "grad_norm": 1.5443123579025269, - "learning_rate": 1.9481721152542774e-05, - "loss": 0.6548, - "step": 207130 - }, - { - "epoch": 1.8311851341077459, - "grad_norm": 5.310937404632568, - "learning_rate": 1.9480247764870903e-05, - "loss": 0.4454, - "step": 207140 - }, - { - "epoch": 1.831273537368058, - "grad_norm": 1.7682256698608398, - "learning_rate": 1.9478774377199034e-05, - "loss": 0.5112, - "step": 207150 - }, - { - "epoch": 1.8313619406283703, - "grad_norm": 2.3581113815307617, - "learning_rate": 1.9477300989527163e-05, - "loss": 0.4136, - "step": 207160 - }, - { - "epoch": 1.8314503438886827, - "grad_norm": 4.200525283813477, - "learning_rate": 1.947582760185529e-05, - "loss": 0.5595, - "step": 207170 - }, - { - "epoch": 1.8315387471489948, - "grad_norm": 2.5899343490600586, - "learning_rate": 1.9474354214183423e-05, - "loss": 0.5646, - "step": 207180 - }, - { - "epoch": 1.8316271504093071, - "grad_norm": 3.1714022159576416, - "learning_rate": 1.947288082651155e-05, - "loss": 0.5637, - "step": 207190 - }, - { - "epoch": 1.8317155536696195, - "grad_norm": 2.9272472858428955, - "learning_rate": 1.947140743883968e-05, - "loss": 0.6306, - "step": 207200 - }, - { - "epoch": 1.8318039569299316, - "grad_norm": 3.404977798461914, - "learning_rate": 1.9469934051167808e-05, - "loss": 0.6518, - "step": 207210 - }, - { - "epoch": 1.8318923601902437, - "grad_norm": 5.2195210456848145, - "learning_rate": 1.946846066349594e-05, - "loss": 0.5652, - "step": 207220 - }, - { - "epoch": 1.831980763450556, - "grad_norm": 6.188805103302002, - "learning_rate": 1.9466987275824068e-05, - "loss": 0.5442, - "step": 207230 - }, - { - "epoch": 1.8320691667108684, - "grad_norm": 6.709285736083984, - "learning_rate": 1.9465513888152196e-05, - "loss": 0.716, - "step": 207240 - }, - { - "epoch": 1.8321575699711805, - "grad_norm": 1.6161854267120361, - "learning_rate": 1.9464040500480328e-05, - "loss": 0.5632, - "step": 207250 - }, - { - "epoch": 1.8322459732314926, - "grad_norm": 2.73622989654541, - "learning_rate": 1.9462567112808456e-05, - "loss": 0.617, - "step": 207260 - }, - { - "epoch": 1.832334376491805, - "grad_norm": 8.8794584274292, - "learning_rate": 1.9461093725136585e-05, - "loss": 0.6688, - "step": 207270 - }, - { - "epoch": 1.8324227797521173, - "grad_norm": 2.570241928100586, - "learning_rate": 1.9459620337464713e-05, - "loss": 0.4684, - "step": 207280 - }, - { - "epoch": 1.8325111830124294, - "grad_norm": 9.02534008026123, - "learning_rate": 1.9458146949792845e-05, - "loss": 0.7221, - "step": 207290 - }, - { - "epoch": 1.8325995862727418, - "grad_norm": 5.554074287414551, - "learning_rate": 1.9456673562120973e-05, - "loss": 0.6299, - "step": 207300 - }, - { - "epoch": 1.8326879895330541, - "grad_norm": 1.8187109231948853, - "learning_rate": 1.94552001744491e-05, - "loss": 0.5546, - "step": 207310 - }, - { - "epoch": 1.8327763927933662, - "grad_norm": 3.4909963607788086, - "learning_rate": 1.945372678677723e-05, - "loss": 0.4557, - "step": 207320 - }, - { - "epoch": 1.8328647960536784, - "grad_norm": 3.637523889541626, - "learning_rate": 1.9452253399105362e-05, - "loss": 0.5962, - "step": 207330 - }, - { - "epoch": 1.8329531993139907, - "grad_norm": 1.782825231552124, - "learning_rate": 1.945078001143349e-05, - "loss": 0.575, - "step": 207340 - }, - { - "epoch": 1.833041602574303, - "grad_norm": 1.9207335710525513, - "learning_rate": 1.944930662376162e-05, - "loss": 0.4993, - "step": 207350 - }, - { - "epoch": 1.8331300058346152, - "grad_norm": 1.6457518339157104, - "learning_rate": 1.944783323608975e-05, - "loss": 0.5708, - "step": 207360 - }, - { - "epoch": 1.8332184090949273, - "grad_norm": 0.7958346009254456, - "learning_rate": 1.944635984841788e-05, - "loss": 0.61, - "step": 207370 - }, - { - "epoch": 1.8333068123552396, - "grad_norm": 4.749642372131348, - "learning_rate": 1.9444886460746007e-05, - "loss": 0.6038, - "step": 207380 - }, - { - "epoch": 1.833395215615552, - "grad_norm": 13.015735626220703, - "learning_rate": 1.9443413073074135e-05, - "loss": 0.592, - "step": 207390 - }, - { - "epoch": 1.833483618875864, - "grad_norm": 2.589996337890625, - "learning_rate": 1.9441939685402267e-05, - "loss": 0.6938, - "step": 207400 - }, - { - "epoch": 1.8335720221361764, - "grad_norm": 1.5949350595474243, - "learning_rate": 1.9440466297730395e-05, - "loss": 0.6871, - "step": 207410 - }, - { - "epoch": 1.8336604253964888, - "grad_norm": 5.7265424728393555, - "learning_rate": 1.9438992910058524e-05, - "loss": 0.6874, - "step": 207420 - }, - { - "epoch": 1.8337488286568009, - "grad_norm": 3.134699821472168, - "learning_rate": 1.9437519522386652e-05, - "loss": 0.6307, - "step": 207430 - }, - { - "epoch": 1.833837231917113, - "grad_norm": 1.5864468812942505, - "learning_rate": 1.9436046134714784e-05, - "loss": 0.5999, - "step": 207440 - }, - { - "epoch": 1.8339256351774254, - "grad_norm": 2.123861789703369, - "learning_rate": 1.9434572747042912e-05, - "loss": 0.4582, - "step": 207450 - }, - { - "epoch": 1.8340140384377377, - "grad_norm": 3.683892250061035, - "learning_rate": 1.943309935937104e-05, - "loss": 0.6082, - "step": 207460 - }, - { - "epoch": 1.8341024416980498, - "grad_norm": 7.93398380279541, - "learning_rate": 1.9431625971699172e-05, - "loss": 0.6664, - "step": 207470 - }, - { - "epoch": 1.834190844958362, - "grad_norm": 0.6710127592086792, - "learning_rate": 1.94301525840273e-05, - "loss": 0.6653, - "step": 207480 - }, - { - "epoch": 1.8342792482186743, - "grad_norm": 3.0037691593170166, - "learning_rate": 1.942867919635543e-05, - "loss": 0.5595, - "step": 207490 - }, - { - "epoch": 1.8343676514789866, - "grad_norm": 1.559056282043457, - "learning_rate": 1.9427205808683557e-05, - "loss": 0.4848, - "step": 207500 - }, - { - "epoch": 1.8344560547392987, - "grad_norm": 2.6961400508880615, - "learning_rate": 1.942573242101169e-05, - "loss": 0.6084, - "step": 207510 - }, - { - "epoch": 1.834544457999611, - "grad_norm": 5.092144012451172, - "learning_rate": 1.9424259033339817e-05, - "loss": 0.6226, - "step": 207520 - }, - { - "epoch": 1.8346328612599234, - "grad_norm": 1.878910779953003, - "learning_rate": 1.9422785645667946e-05, - "loss": 0.5703, - "step": 207530 - }, - { - "epoch": 1.8347212645202355, - "grad_norm": 3.9596362113952637, - "learning_rate": 1.9421312257996077e-05, - "loss": 0.733, - "step": 207540 - }, - { - "epoch": 1.8348096677805477, - "grad_norm": 8.592870712280273, - "learning_rate": 1.9419838870324206e-05, - "loss": 0.5475, - "step": 207550 - }, - { - "epoch": 1.83489807104086, - "grad_norm": 2.708343505859375, - "learning_rate": 1.9418365482652334e-05, - "loss": 0.5893, - "step": 207560 - }, - { - "epoch": 1.8349864743011723, - "grad_norm": 2.026700496673584, - "learning_rate": 1.9416892094980463e-05, - "loss": 0.5527, - "step": 207570 - }, - { - "epoch": 1.8350748775614845, - "grad_norm": 1.2991694211959839, - "learning_rate": 1.9415418707308594e-05, - "loss": 0.6627, - "step": 207580 - }, - { - "epoch": 1.8351632808217966, - "grad_norm": 1.454665184020996, - "learning_rate": 1.9413945319636723e-05, - "loss": 0.5932, - "step": 207590 - }, - { - "epoch": 1.835251684082109, - "grad_norm": 1.2965185642242432, - "learning_rate": 1.941247193196485e-05, - "loss": 0.5742, - "step": 207600 - }, - { - "epoch": 1.8353400873424213, - "grad_norm": 1.6546359062194824, - "learning_rate": 1.941099854429298e-05, - "loss": 0.5657, - "step": 207610 - }, - { - "epoch": 1.8354284906027334, - "grad_norm": 1.7437026500701904, - "learning_rate": 1.940952515662111e-05, - "loss": 0.564, - "step": 207620 - }, - { - "epoch": 1.8355168938630455, - "grad_norm": 2.647946834564209, - "learning_rate": 1.940805176894924e-05, - "loss": 0.6428, - "step": 207630 - }, - { - "epoch": 1.835605297123358, - "grad_norm": 4.591547012329102, - "learning_rate": 1.9406578381277368e-05, - "loss": 0.5847, - "step": 207640 - }, - { - "epoch": 1.8356937003836702, - "grad_norm": 3.490241050720215, - "learning_rate": 1.94051049936055e-05, - "loss": 0.5666, - "step": 207650 - }, - { - "epoch": 1.8357821036439823, - "grad_norm": 2.4817020893096924, - "learning_rate": 1.9403631605933628e-05, - "loss": 0.4616, - "step": 207660 - }, - { - "epoch": 1.8358705069042947, - "grad_norm": 1.6254853010177612, - "learning_rate": 1.9402158218261756e-05, - "loss": 0.5828, - "step": 207670 - }, - { - "epoch": 1.835958910164607, - "grad_norm": 2.2969744205474854, - "learning_rate": 1.9400684830589885e-05, - "loss": 0.6024, - "step": 207680 - }, - { - "epoch": 1.8360473134249191, - "grad_norm": 3.1695868968963623, - "learning_rate": 1.9399211442918016e-05, - "loss": 0.6249, - "step": 207690 - }, - { - "epoch": 1.8361357166852312, - "grad_norm": 1.6998211145401, - "learning_rate": 1.9397738055246145e-05, - "loss": 0.4365, - "step": 207700 - }, - { - "epoch": 1.8362241199455436, - "grad_norm": 2.9159839153289795, - "learning_rate": 1.9396264667574273e-05, - "loss": 0.6375, - "step": 207710 - }, - { - "epoch": 1.836312523205856, - "grad_norm": 1.8462963104248047, - "learning_rate": 1.9394791279902405e-05, - "loss": 0.5091, - "step": 207720 - }, - { - "epoch": 1.836400926466168, - "grad_norm": 1.6872423887252808, - "learning_rate": 1.9393317892230533e-05, - "loss": 0.5515, - "step": 207730 - }, - { - "epoch": 1.8364893297264802, - "grad_norm": 2.599106550216675, - "learning_rate": 1.939184450455866e-05, - "loss": 0.7511, - "step": 207740 - }, - { - "epoch": 1.8365777329867925, - "grad_norm": 3.112389087677002, - "learning_rate": 1.939037111688679e-05, - "loss": 0.6244, - "step": 207750 - }, - { - "epoch": 1.8366661362471048, - "grad_norm": 1.9276559352874756, - "learning_rate": 1.938889772921492e-05, - "loss": 0.6984, - "step": 207760 - }, - { - "epoch": 1.836754539507417, - "grad_norm": 2.162914752960205, - "learning_rate": 1.938742434154305e-05, - "loss": 0.5041, - "step": 207770 - }, - { - "epoch": 1.8368429427677293, - "grad_norm": 1.7087122201919556, - "learning_rate": 1.9385950953871178e-05, - "loss": 0.5383, - "step": 207780 - }, - { - "epoch": 1.8369313460280416, - "grad_norm": 2.3233726024627686, - "learning_rate": 1.9384477566199307e-05, - "loss": 0.55, - "step": 207790 - }, - { - "epoch": 1.8370197492883538, - "grad_norm": 1.1416159868240356, - "learning_rate": 1.938300417852744e-05, - "loss": 0.6024, - "step": 207800 - }, - { - "epoch": 1.8371081525486659, - "grad_norm": 2.158411979675293, - "learning_rate": 1.9381530790855567e-05, - "loss": 0.5108, - "step": 207810 - }, - { - "epoch": 1.8371965558089782, - "grad_norm": 1.3921183347702026, - "learning_rate": 1.9380057403183695e-05, - "loss": 0.604, - "step": 207820 - }, - { - "epoch": 1.8372849590692906, - "grad_norm": 1.9411571025848389, - "learning_rate": 1.9378584015511827e-05, - "loss": 0.6256, - "step": 207830 - }, - { - "epoch": 1.8373733623296027, - "grad_norm": 1.8781229257583618, - "learning_rate": 1.9377110627839955e-05, - "loss": 0.5606, - "step": 207840 - }, - { - "epoch": 1.8374617655899148, - "grad_norm": 9.537175178527832, - "learning_rate": 1.9375637240168084e-05, - "loss": 0.5402, - "step": 207850 - }, - { - "epoch": 1.8375501688502272, - "grad_norm": 3.0181829929351807, - "learning_rate": 1.9374163852496212e-05, - "loss": 0.6142, - "step": 207860 - }, - { - "epoch": 1.8376385721105395, - "grad_norm": 11.303926467895508, - "learning_rate": 1.9372690464824344e-05, - "loss": 0.544, - "step": 207870 - }, - { - "epoch": 1.8377269753708516, - "grad_norm": 4.462282180786133, - "learning_rate": 1.9371217077152472e-05, - "loss": 0.6198, - "step": 207880 - }, - { - "epoch": 1.837815378631164, - "grad_norm": 6.00845193862915, - "learning_rate": 1.93697436894806e-05, - "loss": 0.5571, - "step": 207890 - }, - { - "epoch": 1.8379037818914763, - "grad_norm": 2.0074462890625, - "learning_rate": 1.936827030180873e-05, - "loss": 0.6813, - "step": 207900 - }, - { - "epoch": 1.8379921851517884, - "grad_norm": 2.754859447479248, - "learning_rate": 1.936679691413686e-05, - "loss": 0.6652, - "step": 207910 - }, - { - "epoch": 1.8380805884121005, - "grad_norm": 4.443384647369385, - "learning_rate": 1.936532352646499e-05, - "loss": 0.6517, - "step": 207920 - }, - { - "epoch": 1.8381689916724129, - "grad_norm": 2.1095924377441406, - "learning_rate": 1.9363850138793117e-05, - "loss": 0.6979, - "step": 207930 - }, - { - "epoch": 1.8382573949327252, - "grad_norm": 1.8302702903747559, - "learning_rate": 1.936237675112125e-05, - "loss": 0.506, - "step": 207940 - }, - { - "epoch": 1.8383457981930373, - "grad_norm": 0.7646289467811584, - "learning_rate": 1.9360903363449377e-05, - "loss": 0.5938, - "step": 207950 - }, - { - "epoch": 1.8384342014533495, - "grad_norm": 5.645435810089111, - "learning_rate": 1.9359429975777506e-05, - "loss": 0.5905, - "step": 207960 - }, - { - "epoch": 1.8385226047136618, - "grad_norm": 3.4635891914367676, - "learning_rate": 1.9357956588105637e-05, - "loss": 0.6362, - "step": 207970 - }, - { - "epoch": 1.8386110079739741, - "grad_norm": 14.734624862670898, - "learning_rate": 1.9356483200433766e-05, - "loss": 0.712, - "step": 207980 - }, - { - "epoch": 1.8386994112342863, - "grad_norm": 2.7411723136901855, - "learning_rate": 1.9355009812761894e-05, - "loss": 0.5717, - "step": 207990 - }, - { - "epoch": 1.8387878144945986, - "grad_norm": 3.2438783645629883, - "learning_rate": 1.9353536425090026e-05, - "loss": 0.5634, - "step": 208000 - }, - { - "epoch": 1.838876217754911, - "grad_norm": 3.8390941619873047, - "learning_rate": 1.9352063037418154e-05, - "loss": 0.5358, - "step": 208010 - }, - { - "epoch": 1.838964621015223, - "grad_norm": 2.427786350250244, - "learning_rate": 1.9350589649746283e-05, - "loss": 0.5674, - "step": 208020 - }, - { - "epoch": 1.8390530242755352, - "grad_norm": 12.458891868591309, - "learning_rate": 1.9349116262074414e-05, - "loss": 0.6475, - "step": 208030 - }, - { - "epoch": 1.8391414275358475, - "grad_norm": 1.6757439374923706, - "learning_rate": 1.9347642874402543e-05, - "loss": 0.5057, - "step": 208040 - }, - { - "epoch": 1.8392298307961599, - "grad_norm": 3.8869035243988037, - "learning_rate": 1.934616948673067e-05, - "loss": 0.5077, - "step": 208050 - }, - { - "epoch": 1.839318234056472, - "grad_norm": 1.2677150964736938, - "learning_rate": 1.9344696099058803e-05, - "loss": 0.6241, - "step": 208060 - }, - { - "epoch": 1.839406637316784, - "grad_norm": 1.1207103729248047, - "learning_rate": 1.934322271138693e-05, - "loss": 0.5549, - "step": 208070 - }, - { - "epoch": 1.8394950405770965, - "grad_norm": 1.5252139568328857, - "learning_rate": 1.934174932371506e-05, - "loss": 0.575, - "step": 208080 - }, - { - "epoch": 1.8395834438374088, - "grad_norm": 2.096590042114258, - "learning_rate": 1.934027593604319e-05, - "loss": 0.4791, - "step": 208090 - }, - { - "epoch": 1.839671847097721, - "grad_norm": 11.018218040466309, - "learning_rate": 1.933880254837132e-05, - "loss": 0.5244, - "step": 208100 - }, - { - "epoch": 1.8397602503580333, - "grad_norm": 8.555094718933105, - "learning_rate": 1.9337329160699448e-05, - "loss": 0.5983, - "step": 208110 - }, - { - "epoch": 1.8398486536183456, - "grad_norm": 2.8810555934906006, - "learning_rate": 1.933585577302758e-05, - "loss": 0.591, - "step": 208120 - }, - { - "epoch": 1.8399370568786577, - "grad_norm": 3.159071207046509, - "learning_rate": 1.9334382385355708e-05, - "loss": 0.704, - "step": 208130 - }, - { - "epoch": 1.8400254601389698, - "grad_norm": 3.9344112873077393, - "learning_rate": 1.9332908997683836e-05, - "loss": 0.7072, - "step": 208140 - }, - { - "epoch": 1.8401138633992822, - "grad_norm": 1.484507441520691, - "learning_rate": 1.9331435610011965e-05, - "loss": 0.6429, - "step": 208150 - }, - { - "epoch": 1.8402022666595945, - "grad_norm": 2.2243213653564453, - "learning_rate": 1.9329962222340096e-05, - "loss": 0.4745, - "step": 208160 - }, - { - "epoch": 1.8402906699199066, - "grad_norm": 3.3667314052581787, - "learning_rate": 1.9328488834668225e-05, - "loss": 0.5799, - "step": 208170 - }, - { - "epoch": 1.8403790731802188, - "grad_norm": 2.562797784805298, - "learning_rate": 1.9327015446996353e-05, - "loss": 0.6137, - "step": 208180 - }, - { - "epoch": 1.840467476440531, - "grad_norm": 3.30324649810791, - "learning_rate": 1.9325542059324485e-05, - "loss": 0.6696, - "step": 208190 - }, - { - "epoch": 1.8405558797008434, - "grad_norm": 2.3900535106658936, - "learning_rate": 1.9324068671652613e-05, - "loss": 0.5436, - "step": 208200 - }, - { - "epoch": 1.8406442829611556, - "grad_norm": 1.4462683200836182, - "learning_rate": 1.932259528398074e-05, - "loss": 0.5414, - "step": 208210 - }, - { - "epoch": 1.8407326862214677, - "grad_norm": 3.147698402404785, - "learning_rate": 1.932112189630887e-05, - "loss": 0.7038, - "step": 208220 - }, - { - "epoch": 1.8408210894817802, - "grad_norm": 1.601162314414978, - "learning_rate": 1.9319648508637e-05, - "loss": 0.62, - "step": 208230 - }, - { - "epoch": 1.8409094927420924, - "grad_norm": 2.883354902267456, - "learning_rate": 1.931817512096513e-05, - "loss": 0.6798, - "step": 208240 - }, - { - "epoch": 1.8409978960024045, - "grad_norm": 1.8171663284301758, - "learning_rate": 1.931670173329326e-05, - "loss": 0.6214, - "step": 208250 - }, - { - "epoch": 1.8410862992627168, - "grad_norm": 7.812248706817627, - "learning_rate": 1.9315228345621387e-05, - "loss": 0.5539, - "step": 208260 - }, - { - "epoch": 1.8411747025230292, - "grad_norm": 6.320882320404053, - "learning_rate": 1.931375495794952e-05, - "loss": 0.6669, - "step": 208270 - }, - { - "epoch": 1.8412631057833413, - "grad_norm": 1.2448458671569824, - "learning_rate": 1.9312281570277647e-05, - "loss": 0.5023, - "step": 208280 - }, - { - "epoch": 1.8413515090436534, - "grad_norm": 8.441811561584473, - "learning_rate": 1.9310808182605775e-05, - "loss": 0.472, - "step": 208290 - }, - { - "epoch": 1.8414399123039658, - "grad_norm": 3.837329387664795, - "learning_rate": 1.9309334794933907e-05, - "loss": 0.549, - "step": 208300 - }, - { - "epoch": 1.841528315564278, - "grad_norm": 2.837460517883301, - "learning_rate": 1.9307861407262035e-05, - "loss": 0.6207, - "step": 208310 - }, - { - "epoch": 1.8416167188245902, - "grad_norm": 1.691310167312622, - "learning_rate": 1.9306388019590164e-05, - "loss": 0.5241, - "step": 208320 - }, - { - "epoch": 1.8417051220849023, - "grad_norm": 1.5407449007034302, - "learning_rate": 1.9304914631918292e-05, - "loss": 0.5545, - "step": 208330 - }, - { - "epoch": 1.8417935253452147, - "grad_norm": 9.079808235168457, - "learning_rate": 1.9303441244246424e-05, - "loss": 0.5403, - "step": 208340 - }, - { - "epoch": 1.841881928605527, - "grad_norm": 3.774881601333618, - "learning_rate": 1.9301967856574552e-05, - "loss": 0.5198, - "step": 208350 - }, - { - "epoch": 1.8419703318658391, - "grad_norm": 3.2473204135894775, - "learning_rate": 1.930049446890268e-05, - "loss": 0.5796, - "step": 208360 - }, - { - "epoch": 1.8420587351261515, - "grad_norm": 2.596795082092285, - "learning_rate": 1.9299021081230812e-05, - "loss": 0.6532, - "step": 208370 - }, - { - "epoch": 1.8421471383864638, - "grad_norm": 1.7869517803192139, - "learning_rate": 1.929754769355894e-05, - "loss": 0.6459, - "step": 208380 - }, - { - "epoch": 1.842235541646776, - "grad_norm": 1.2131072282791138, - "learning_rate": 1.929607430588707e-05, - "loss": 0.4164, - "step": 208390 - }, - { - "epoch": 1.842323944907088, - "grad_norm": 0.9625169038772583, - "learning_rate": 1.9294600918215197e-05, - "loss": 0.529, - "step": 208400 - }, - { - "epoch": 1.8424123481674004, - "grad_norm": 6.193233966827393, - "learning_rate": 1.929312753054333e-05, - "loss": 0.6566, - "step": 208410 - }, - { - "epoch": 1.8425007514277127, - "grad_norm": 3.986889600753784, - "learning_rate": 1.9291654142871457e-05, - "loss": 0.56, - "step": 208420 - }, - { - "epoch": 1.8425891546880249, - "grad_norm": 8.127528190612793, - "learning_rate": 1.9290180755199586e-05, - "loss": 0.546, - "step": 208430 - }, - { - "epoch": 1.842677557948337, - "grad_norm": 1.231967568397522, - "learning_rate": 1.9288707367527714e-05, - "loss": 0.492, - "step": 208440 - }, - { - "epoch": 1.8427659612086493, - "grad_norm": 5.981503486633301, - "learning_rate": 1.9287233979855846e-05, - "loss": 0.6479, - "step": 208450 - }, - { - "epoch": 1.8428543644689617, - "grad_norm": 9.92072868347168, - "learning_rate": 1.9285760592183974e-05, - "loss": 0.6576, - "step": 208460 - }, - { - "epoch": 1.8429427677292738, - "grad_norm": 2.594308853149414, - "learning_rate": 1.9284287204512102e-05, - "loss": 0.5538, - "step": 208470 - }, - { - "epoch": 1.8430311709895861, - "grad_norm": 2.1643831729888916, - "learning_rate": 1.9282813816840234e-05, - "loss": 0.7178, - "step": 208480 - }, - { - "epoch": 1.8431195742498985, - "grad_norm": 2.940063714981079, - "learning_rate": 1.9281340429168363e-05, - "loss": 0.5861, - "step": 208490 - }, - { - "epoch": 1.8432079775102106, - "grad_norm": 3.8470075130462646, - "learning_rate": 1.927986704149649e-05, - "loss": 0.5032, - "step": 208500 - }, - { - "epoch": 1.8432963807705227, - "grad_norm": 4.685696125030518, - "learning_rate": 1.927839365382462e-05, - "loss": 0.5485, - "step": 208510 - }, - { - "epoch": 1.843384784030835, - "grad_norm": 6.664670467376709, - "learning_rate": 1.927692026615275e-05, - "loss": 0.7401, - "step": 208520 - }, - { - "epoch": 1.8434731872911474, - "grad_norm": 3.396160840988159, - "learning_rate": 1.927544687848088e-05, - "loss": 0.4842, - "step": 208530 - }, - { - "epoch": 1.8435615905514595, - "grad_norm": 1.1683683395385742, - "learning_rate": 1.9273973490809008e-05, - "loss": 0.5451, - "step": 208540 - }, - { - "epoch": 1.8436499938117716, - "grad_norm": 6.246324062347412, - "learning_rate": 1.9272500103137136e-05, - "loss": 0.5874, - "step": 208550 - }, - { - "epoch": 1.843738397072084, - "grad_norm": 2.184767961502075, - "learning_rate": 1.9271026715465268e-05, - "loss": 0.5323, - "step": 208560 - }, - { - "epoch": 1.8438268003323963, - "grad_norm": 2.2570149898529053, - "learning_rate": 1.9269553327793396e-05, - "loss": 0.6178, - "step": 208570 - }, - { - "epoch": 1.8439152035927084, - "grad_norm": 9.028661727905273, - "learning_rate": 1.9268079940121525e-05, - "loss": 0.479, - "step": 208580 - }, - { - "epoch": 1.8440036068530208, - "grad_norm": 1.981824278831482, - "learning_rate": 1.9266606552449656e-05, - "loss": 0.7059, - "step": 208590 - }, - { - "epoch": 1.8440920101133331, - "grad_norm": 1.6644154787063599, - "learning_rate": 1.9265133164777785e-05, - "loss": 0.6297, - "step": 208600 - }, - { - "epoch": 1.8441804133736452, - "grad_norm": 2.061619520187378, - "learning_rate": 1.9263659777105913e-05, - "loss": 0.4998, - "step": 208610 - }, - { - "epoch": 1.8442688166339574, - "grad_norm": 2.6741526126861572, - "learning_rate": 1.926218638943404e-05, - "loss": 0.6671, - "step": 208620 - }, - { - "epoch": 1.8443572198942697, - "grad_norm": 3.8932957649230957, - "learning_rate": 1.9260713001762173e-05, - "loss": 0.5225, - "step": 208630 - }, - { - "epoch": 1.844445623154582, - "grad_norm": 2.195492744445801, - "learning_rate": 1.92592396140903e-05, - "loss": 0.6988, - "step": 208640 - }, - { - "epoch": 1.8445340264148942, - "grad_norm": 2.725295305252075, - "learning_rate": 1.925776622641843e-05, - "loss": 0.4874, - "step": 208650 - }, - { - "epoch": 1.8446224296752063, - "grad_norm": 4.1461100578308105, - "learning_rate": 1.925629283874656e-05, - "loss": 0.4211, - "step": 208660 - }, - { - "epoch": 1.8447108329355186, - "grad_norm": 4.067321300506592, - "learning_rate": 1.925481945107469e-05, - "loss": 0.6168, - "step": 208670 - }, - { - "epoch": 1.844799236195831, - "grad_norm": 2.246029853820801, - "learning_rate": 1.9253346063402818e-05, - "loss": 0.5897, - "step": 208680 - }, - { - "epoch": 1.844887639456143, - "grad_norm": 1.4188412427902222, - "learning_rate": 1.9251872675730947e-05, - "loss": 0.5161, - "step": 208690 - }, - { - "epoch": 1.8449760427164554, - "grad_norm": 1.4774802923202515, - "learning_rate": 1.925039928805908e-05, - "loss": 0.5898, - "step": 208700 - }, - { - "epoch": 1.8450644459767678, - "grad_norm": 1.8297480344772339, - "learning_rate": 1.9248925900387207e-05, - "loss": 0.4712, - "step": 208710 - }, - { - "epoch": 1.84515284923708, - "grad_norm": 2.1271655559539795, - "learning_rate": 1.9247452512715335e-05, - "loss": 0.4736, - "step": 208720 - }, - { - "epoch": 1.845241252497392, - "grad_norm": 3.366549253463745, - "learning_rate": 1.9245979125043463e-05, - "loss": 0.4887, - "step": 208730 - }, - { - "epoch": 1.8453296557577044, - "grad_norm": 6.425593376159668, - "learning_rate": 1.9244505737371595e-05, - "loss": 0.489, - "step": 208740 - }, - { - "epoch": 1.8454180590180167, - "grad_norm": 2.044172525405884, - "learning_rate": 1.9243032349699723e-05, - "loss": 0.6317, - "step": 208750 - }, - { - "epoch": 1.8455064622783288, - "grad_norm": 1.8842204809188843, - "learning_rate": 1.9241558962027852e-05, - "loss": 0.4849, - "step": 208760 - }, - { - "epoch": 1.845594865538641, - "grad_norm": 4.034019947052002, - "learning_rate": 1.9240085574355984e-05, - "loss": 0.6941, - "step": 208770 - }, - { - "epoch": 1.8456832687989533, - "grad_norm": 1.5998018980026245, - "learning_rate": 1.9238612186684112e-05, - "loss": 0.4624, - "step": 208780 - }, - { - "epoch": 1.8457716720592656, - "grad_norm": 1.3839768171310425, - "learning_rate": 1.923713879901224e-05, - "loss": 0.6336, - "step": 208790 - }, - { - "epoch": 1.8458600753195777, - "grad_norm": 14.206204414367676, - "learning_rate": 1.923566541134037e-05, - "loss": 0.6308, - "step": 208800 - }, - { - "epoch": 1.8459484785798899, - "grad_norm": 5.289737224578857, - "learning_rate": 1.92341920236685e-05, - "loss": 0.6465, - "step": 208810 - }, - { - "epoch": 1.8460368818402024, - "grad_norm": 5.853087425231934, - "learning_rate": 1.923271863599663e-05, - "loss": 0.6347, - "step": 208820 - }, - { - "epoch": 1.8461252851005145, - "grad_norm": 1.3876986503601074, - "learning_rate": 1.9231245248324757e-05, - "loss": 0.4464, - "step": 208830 - }, - { - "epoch": 1.8462136883608267, - "grad_norm": 1.8678480386734009, - "learning_rate": 1.922977186065289e-05, - "loss": 0.5254, - "step": 208840 - }, - { - "epoch": 1.846302091621139, - "grad_norm": 4.040596008300781, - "learning_rate": 1.9228298472981017e-05, - "loss": 0.6068, - "step": 208850 - }, - { - "epoch": 1.8463904948814513, - "grad_norm": 15.859367370605469, - "learning_rate": 1.9226825085309146e-05, - "loss": 0.573, - "step": 208860 - }, - { - "epoch": 1.8464788981417635, - "grad_norm": 2.2348434925079346, - "learning_rate": 1.9225351697637274e-05, - "loss": 0.7017, - "step": 208870 - }, - { - "epoch": 1.8465673014020756, - "grad_norm": 2.014366865158081, - "learning_rate": 1.9223878309965406e-05, - "loss": 0.6181, - "step": 208880 - }, - { - "epoch": 1.846655704662388, - "grad_norm": 2.2384707927703857, - "learning_rate": 1.9222404922293534e-05, - "loss": 0.6106, - "step": 208890 - }, - { - "epoch": 1.8467441079227003, - "grad_norm": 2.4331438541412354, - "learning_rate": 1.9220931534621662e-05, - "loss": 0.5193, - "step": 208900 - }, - { - "epoch": 1.8468325111830124, - "grad_norm": 0.9146058559417725, - "learning_rate": 1.9219458146949794e-05, - "loss": 0.448, - "step": 208910 - }, - { - "epoch": 1.8469209144433245, - "grad_norm": 1.7593638896942139, - "learning_rate": 1.9217984759277922e-05, - "loss": 0.7223, - "step": 208920 - }, - { - "epoch": 1.8470093177036369, - "grad_norm": 7.308826446533203, - "learning_rate": 1.921651137160605e-05, - "loss": 0.7488, - "step": 208930 - }, - { - "epoch": 1.8470977209639492, - "grad_norm": 3.9544637203216553, - "learning_rate": 1.9215037983934183e-05, - "loss": 0.6527, - "step": 208940 - }, - { - "epoch": 1.8471861242242613, - "grad_norm": 6.821430206298828, - "learning_rate": 1.921356459626231e-05, - "loss": 0.5304, - "step": 208950 - }, - { - "epoch": 1.8472745274845737, - "grad_norm": 3.524042844772339, - "learning_rate": 1.921209120859044e-05, - "loss": 0.7257, - "step": 208960 - }, - { - "epoch": 1.847362930744886, - "grad_norm": 3.3266422748565674, - "learning_rate": 1.921061782091857e-05, - "loss": 0.5185, - "step": 208970 - }, - { - "epoch": 1.8474513340051981, - "grad_norm": 7.193199634552002, - "learning_rate": 1.92091444332467e-05, - "loss": 0.4839, - "step": 208980 - }, - { - "epoch": 1.8475397372655102, - "grad_norm": 4.568263530731201, - "learning_rate": 1.9207671045574828e-05, - "loss": 0.5547, - "step": 208990 - }, - { - "epoch": 1.8476281405258226, - "grad_norm": 1.9425479173660278, - "learning_rate": 1.920619765790296e-05, - "loss": 0.5305, - "step": 209000 - }, - { - "epoch": 1.847716543786135, - "grad_norm": 8.574569702148438, - "learning_rate": 1.9204724270231088e-05, - "loss": 0.5875, - "step": 209010 - }, - { - "epoch": 1.847804947046447, - "grad_norm": 2.6623642444610596, - "learning_rate": 1.9203250882559216e-05, - "loss": 0.6051, - "step": 209020 - }, - { - "epoch": 1.8478933503067592, - "grad_norm": 21.262672424316406, - "learning_rate": 1.9201777494887348e-05, - "loss": 0.5521, - "step": 209030 - }, - { - "epoch": 1.8479817535670715, - "grad_norm": 4.10367488861084, - "learning_rate": 1.9200304107215476e-05, - "loss": 0.6743, - "step": 209040 - }, - { - "epoch": 1.8480701568273838, - "grad_norm": 2.1683733463287354, - "learning_rate": 1.9198830719543605e-05, - "loss": 0.6803, - "step": 209050 - }, - { - "epoch": 1.848158560087696, - "grad_norm": 2.451362371444702, - "learning_rate": 1.9197357331871736e-05, - "loss": 0.5931, - "step": 209060 - }, - { - "epoch": 1.8482469633480083, - "grad_norm": 1.2647017240524292, - "learning_rate": 1.9195883944199865e-05, - "loss": 0.5594, - "step": 209070 - }, - { - "epoch": 1.8483353666083207, - "grad_norm": 2.7430880069732666, - "learning_rate": 1.9194410556527993e-05, - "loss": 0.5378, - "step": 209080 - }, - { - "epoch": 1.8484237698686328, - "grad_norm": 3.733816623687744, - "learning_rate": 1.919293716885612e-05, - "loss": 0.5103, - "step": 209090 - }, - { - "epoch": 1.848512173128945, - "grad_norm": 2.061666488647461, - "learning_rate": 1.9191463781184253e-05, - "loss": 0.6082, - "step": 209100 - }, - { - "epoch": 1.8486005763892572, - "grad_norm": 1.3504884243011475, - "learning_rate": 1.918999039351238e-05, - "loss": 0.4425, - "step": 209110 - }, - { - "epoch": 1.8486889796495696, - "grad_norm": 1.0352938175201416, - "learning_rate": 1.918851700584051e-05, - "loss": 0.5305, - "step": 209120 - }, - { - "epoch": 1.8487773829098817, - "grad_norm": 3.7901957035064697, - "learning_rate": 1.918704361816864e-05, - "loss": 0.5637, - "step": 209130 - }, - { - "epoch": 1.8488657861701938, - "grad_norm": 3.6319451332092285, - "learning_rate": 1.918557023049677e-05, - "loss": 0.521, - "step": 209140 - }, - { - "epoch": 1.8489541894305062, - "grad_norm": 1.8487882614135742, - "learning_rate": 1.91840968428249e-05, - "loss": 0.6178, - "step": 209150 - }, - { - "epoch": 1.8490425926908185, - "grad_norm": 2.701673746109009, - "learning_rate": 1.9182623455153027e-05, - "loss": 0.5902, - "step": 209160 - }, - { - "epoch": 1.8491309959511306, - "grad_norm": 1.8205074071884155, - "learning_rate": 1.918115006748116e-05, - "loss": 0.6549, - "step": 209170 - }, - { - "epoch": 1.849219399211443, - "grad_norm": 2.135723352432251, - "learning_rate": 1.9179676679809287e-05, - "loss": 0.4657, - "step": 209180 - }, - { - "epoch": 1.8493078024717553, - "grad_norm": 2.154141664505005, - "learning_rate": 1.9178203292137415e-05, - "loss": 0.5167, - "step": 209190 - }, - { - "epoch": 1.8493962057320674, - "grad_norm": 2.4006640911102295, - "learning_rate": 1.9176729904465543e-05, - "loss": 0.5883, - "step": 209200 - }, - { - "epoch": 1.8494846089923795, - "grad_norm": 3.1476542949676514, - "learning_rate": 1.9175256516793675e-05, - "loss": 0.5373, - "step": 209210 - }, - { - "epoch": 1.8495730122526919, - "grad_norm": 2.0557191371917725, - "learning_rate": 1.9173783129121804e-05, - "loss": 0.5275, - "step": 209220 - }, - { - "epoch": 1.8496614155130042, - "grad_norm": 2.2931723594665527, - "learning_rate": 1.9172309741449932e-05, - "loss": 0.6184, - "step": 209230 - }, - { - "epoch": 1.8497498187733163, - "grad_norm": 4.016658782958984, - "learning_rate": 1.9170836353778064e-05, - "loss": 0.6319, - "step": 209240 - }, - { - "epoch": 1.8498382220336285, - "grad_norm": 1.1208021640777588, - "learning_rate": 1.9169362966106192e-05, - "loss": 0.5133, - "step": 209250 - }, - { - "epoch": 1.8499266252939408, - "grad_norm": 1.048937439918518, - "learning_rate": 1.916788957843432e-05, - "loss": 0.6012, - "step": 209260 - }, - { - "epoch": 1.8500150285542531, - "grad_norm": 1.1958234310150146, - "learning_rate": 1.916641619076245e-05, - "loss": 0.6033, - "step": 209270 - }, - { - "epoch": 1.8501034318145653, - "grad_norm": 19.80698013305664, - "learning_rate": 1.916494280309058e-05, - "loss": 0.5972, - "step": 209280 - }, - { - "epoch": 1.8501918350748776, - "grad_norm": 2.8051559925079346, - "learning_rate": 1.916346941541871e-05, - "loss": 0.592, - "step": 209290 - }, - { - "epoch": 1.85028023833519, - "grad_norm": 1.0027694702148438, - "learning_rate": 1.9161996027746837e-05, - "loss": 0.5449, - "step": 209300 - }, - { - "epoch": 1.850368641595502, - "grad_norm": 11.03569221496582, - "learning_rate": 1.916052264007497e-05, - "loss": 0.5831, - "step": 209310 - }, - { - "epoch": 1.8504570448558142, - "grad_norm": 2.9402170181274414, - "learning_rate": 1.9159049252403097e-05, - "loss": 0.6162, - "step": 209320 - }, - { - "epoch": 1.8505454481161265, - "grad_norm": 10.135941505432129, - "learning_rate": 1.9157575864731226e-05, - "loss": 0.6953, - "step": 209330 - }, - { - "epoch": 1.8506338513764389, - "grad_norm": 13.804825782775879, - "learning_rate": 1.9156102477059354e-05, - "loss": 0.514, - "step": 209340 - }, - { - "epoch": 1.850722254636751, - "grad_norm": 2.008164644241333, - "learning_rate": 1.9154629089387486e-05, - "loss": 0.4948, - "step": 209350 - }, - { - "epoch": 1.8508106578970631, - "grad_norm": 1.5638909339904785, - "learning_rate": 1.9153155701715614e-05, - "loss": 0.6244, - "step": 209360 - }, - { - "epoch": 1.8508990611573755, - "grad_norm": 4.650651931762695, - "learning_rate": 1.9151682314043742e-05, - "loss": 0.677, - "step": 209370 - }, - { - "epoch": 1.8509874644176878, - "grad_norm": 1.0625505447387695, - "learning_rate": 1.915020892637187e-05, - "loss": 0.5537, - "step": 209380 - }, - { - "epoch": 1.851075867678, - "grad_norm": 1.3145408630371094, - "learning_rate": 1.9148735538700003e-05, - "loss": 0.5965, - "step": 209390 - }, - { - "epoch": 1.851164270938312, - "grad_norm": 1.9740639925003052, - "learning_rate": 1.914726215102813e-05, - "loss": 0.5444, - "step": 209400 - }, - { - "epoch": 1.8512526741986246, - "grad_norm": 2.028446912765503, - "learning_rate": 1.914578876335626e-05, - "loss": 0.6332, - "step": 209410 - }, - { - "epoch": 1.8513410774589367, - "grad_norm": 2.316175699234009, - "learning_rate": 1.914431537568439e-05, - "loss": 0.6294, - "step": 209420 - }, - { - "epoch": 1.8514294807192488, - "grad_norm": 3.143463373184204, - "learning_rate": 1.914284198801252e-05, - "loss": 0.5261, - "step": 209430 - }, - { - "epoch": 1.8515178839795612, - "grad_norm": 2.866745710372925, - "learning_rate": 1.9141368600340648e-05, - "loss": 0.4925, - "step": 209440 - }, - { - "epoch": 1.8516062872398735, - "grad_norm": 4.782702445983887, - "learning_rate": 1.9139895212668776e-05, - "loss": 0.68, - "step": 209450 - }, - { - "epoch": 1.8516946905001856, - "grad_norm": 6.505615234375, - "learning_rate": 1.9138421824996908e-05, - "loss": 0.6842, - "step": 209460 - }, - { - "epoch": 1.8517830937604978, - "grad_norm": 0.8906887769699097, - "learning_rate": 1.9136948437325036e-05, - "loss": 0.5339, - "step": 209470 - }, - { - "epoch": 1.85187149702081, - "grad_norm": 1.8407458066940308, - "learning_rate": 1.9135475049653164e-05, - "loss": 0.6347, - "step": 209480 - }, - { - "epoch": 1.8519599002811225, - "grad_norm": 3.4316773414611816, - "learning_rate": 1.9134001661981293e-05, - "loss": 0.4858, - "step": 209490 - }, - { - "epoch": 1.8520483035414346, - "grad_norm": 2.7154715061187744, - "learning_rate": 1.9132528274309425e-05, - "loss": 0.7528, - "step": 209500 - }, - { - "epoch": 1.8521367068017467, - "grad_norm": 2.3810553550720215, - "learning_rate": 1.9131054886637553e-05, - "loss": 0.675, - "step": 209510 - }, - { - "epoch": 1.8522251100620593, - "grad_norm": 2.987447500228882, - "learning_rate": 1.912958149896568e-05, - "loss": 0.6207, - "step": 209520 - }, - { - "epoch": 1.8523135133223714, - "grad_norm": 1.906565546989441, - "learning_rate": 1.9128108111293813e-05, - "loss": 0.5548, - "step": 209530 - }, - { - "epoch": 1.8524019165826835, - "grad_norm": 1.5837750434875488, - "learning_rate": 1.912663472362194e-05, - "loss": 0.4597, - "step": 209540 - }, - { - "epoch": 1.8524903198429958, - "grad_norm": 4.482527732849121, - "learning_rate": 1.912516133595007e-05, - "loss": 0.6536, - "step": 209550 - }, - { - "epoch": 1.8525787231033082, - "grad_norm": 1.0739617347717285, - "learning_rate": 1.9123687948278198e-05, - "loss": 0.5747, - "step": 209560 - }, - { - "epoch": 1.8526671263636203, - "grad_norm": 3.541281223297119, - "learning_rate": 1.912221456060633e-05, - "loss": 0.6488, - "step": 209570 - }, - { - "epoch": 1.8527555296239324, - "grad_norm": 2.5793895721435547, - "learning_rate": 1.9120741172934458e-05, - "loss": 0.5013, - "step": 209580 - }, - { - "epoch": 1.8528439328842448, - "grad_norm": 1.0419985055923462, - "learning_rate": 1.9119267785262587e-05, - "loss": 0.656, - "step": 209590 - }, - { - "epoch": 1.852932336144557, - "grad_norm": 4.573622226715088, - "learning_rate": 1.9117794397590718e-05, - "loss": 0.6409, - "step": 209600 - }, - { - "epoch": 1.8530207394048692, - "grad_norm": 2.8321332931518555, - "learning_rate": 1.9116321009918847e-05, - "loss": 0.5855, - "step": 209610 - }, - { - "epoch": 1.8531091426651813, - "grad_norm": 1.3490846157073975, - "learning_rate": 1.9114847622246975e-05, - "loss": 0.4741, - "step": 209620 - }, - { - "epoch": 1.8531975459254937, - "grad_norm": 2.3741469383239746, - "learning_rate": 1.9113374234575103e-05, - "loss": 0.5789, - "step": 209630 - }, - { - "epoch": 1.853285949185806, - "grad_norm": 4.7944841384887695, - "learning_rate": 1.9111900846903235e-05, - "loss": 0.4586, - "step": 209640 - }, - { - "epoch": 1.8533743524461181, - "grad_norm": 1.7278589010238647, - "learning_rate": 1.9110427459231363e-05, - "loss": 0.4589, - "step": 209650 - }, - { - "epoch": 1.8534627557064305, - "grad_norm": 2.2020726203918457, - "learning_rate": 1.9108954071559492e-05, - "loss": 0.6328, - "step": 209660 - }, - { - "epoch": 1.8535511589667428, - "grad_norm": 15.768167495727539, - "learning_rate": 1.910748068388762e-05, - "loss": 0.6843, - "step": 209670 - }, - { - "epoch": 1.853639562227055, - "grad_norm": 4.236842155456543, - "learning_rate": 1.9106007296215752e-05, - "loss": 0.5868, - "step": 209680 - }, - { - "epoch": 1.853727965487367, - "grad_norm": 4.5089640617370605, - "learning_rate": 1.910453390854388e-05, - "loss": 0.4748, - "step": 209690 - }, - { - "epoch": 1.8538163687476794, - "grad_norm": 3.02734112739563, - "learning_rate": 1.910306052087201e-05, - "loss": 0.6417, - "step": 209700 - }, - { - "epoch": 1.8539047720079918, - "grad_norm": 2.187847375869751, - "learning_rate": 1.910158713320014e-05, - "loss": 0.6094, - "step": 209710 - }, - { - "epoch": 1.8539931752683039, - "grad_norm": 2.2535903453826904, - "learning_rate": 1.910011374552827e-05, - "loss": 0.6177, - "step": 209720 - }, - { - "epoch": 1.854081578528616, - "grad_norm": 1.3434786796569824, - "learning_rate": 1.9098640357856397e-05, - "loss": 0.4544, - "step": 209730 - }, - { - "epoch": 1.8541699817889283, - "grad_norm": 16.987985610961914, - "learning_rate": 1.9097166970184525e-05, - "loss": 0.4421, - "step": 209740 - }, - { - "epoch": 1.8542583850492407, - "grad_norm": 1.3285064697265625, - "learning_rate": 1.9095693582512657e-05, - "loss": 0.5307, - "step": 209750 - }, - { - "epoch": 1.8543467883095528, - "grad_norm": 5.595705986022949, - "learning_rate": 1.9094220194840786e-05, - "loss": 0.5456, - "step": 209760 - }, - { - "epoch": 1.8544351915698651, - "grad_norm": 13.066436767578125, - "learning_rate": 1.9092746807168914e-05, - "loss": 0.4763, - "step": 209770 - }, - { - "epoch": 1.8545235948301775, - "grad_norm": 4.322331428527832, - "learning_rate": 1.9091273419497046e-05, - "loss": 0.4579, - "step": 209780 - }, - { - "epoch": 1.8546119980904896, - "grad_norm": 2.9211742877960205, - "learning_rate": 1.9089800031825174e-05, - "loss": 0.7345, - "step": 209790 - }, - { - "epoch": 1.8547004013508017, - "grad_norm": 2.2567601203918457, - "learning_rate": 1.9088326644153302e-05, - "loss": 0.6078, - "step": 209800 - }, - { - "epoch": 1.854788804611114, - "grad_norm": 3.767890453338623, - "learning_rate": 1.908685325648143e-05, - "loss": 0.7, - "step": 209810 - }, - { - "epoch": 1.8548772078714264, - "grad_norm": 2.635831117630005, - "learning_rate": 1.9085379868809562e-05, - "loss": 0.6286, - "step": 209820 - }, - { - "epoch": 1.8549656111317385, - "grad_norm": 2.658792734146118, - "learning_rate": 1.908390648113769e-05, - "loss": 0.6185, - "step": 209830 - }, - { - "epoch": 1.8550540143920506, - "grad_norm": 2.8339874744415283, - "learning_rate": 1.908243309346582e-05, - "loss": 0.6188, - "step": 209840 - }, - { - "epoch": 1.855142417652363, - "grad_norm": 1.5091569423675537, - "learning_rate": 1.908095970579395e-05, - "loss": 0.6727, - "step": 209850 - }, - { - "epoch": 1.8552308209126753, - "grad_norm": 2.574605941772461, - "learning_rate": 1.907948631812208e-05, - "loss": 0.6069, - "step": 209860 - }, - { - "epoch": 1.8553192241729874, - "grad_norm": 7.200460910797119, - "learning_rate": 1.9078012930450208e-05, - "loss": 0.5838, - "step": 209870 - }, - { - "epoch": 1.8554076274332998, - "grad_norm": 3.098416805267334, - "learning_rate": 1.907653954277834e-05, - "loss": 0.6059, - "step": 209880 - }, - { - "epoch": 1.8554960306936121, - "grad_norm": 3.291194438934326, - "learning_rate": 1.9075066155106468e-05, - "loss": 0.6237, - "step": 209890 - }, - { - "epoch": 1.8555844339539243, - "grad_norm": 1.4947134256362915, - "learning_rate": 1.9073592767434596e-05, - "loss": 0.5487, - "step": 209900 - }, - { - "epoch": 1.8556728372142364, - "grad_norm": 1.5824846029281616, - "learning_rate": 1.9072119379762728e-05, - "loss": 0.6922, - "step": 209910 - }, - { - "epoch": 1.8557612404745487, - "grad_norm": 2.8694868087768555, - "learning_rate": 1.9070645992090856e-05, - "loss": 0.449, - "step": 209920 - }, - { - "epoch": 1.855849643734861, - "grad_norm": 2.240093946456909, - "learning_rate": 1.9069172604418984e-05, - "loss": 0.6713, - "step": 209930 - }, - { - "epoch": 1.8559380469951732, - "grad_norm": 4.451633453369141, - "learning_rate": 1.9067699216747116e-05, - "loss": 0.4802, - "step": 209940 - }, - { - "epoch": 1.8560264502554853, - "grad_norm": 2.2405810356140137, - "learning_rate": 1.9066225829075245e-05, - "loss": 0.6286, - "step": 209950 - }, - { - "epoch": 1.8561148535157976, - "grad_norm": 2.9434351921081543, - "learning_rate": 1.9064752441403373e-05, - "loss": 0.6735, - "step": 209960 - }, - { - "epoch": 1.85620325677611, - "grad_norm": 9.438030242919922, - "learning_rate": 1.9063279053731505e-05, - "loss": 0.5048, - "step": 209970 - }, - { - "epoch": 1.856291660036422, - "grad_norm": 0.9189260601997375, - "learning_rate": 1.9061805666059633e-05, - "loss": 0.6372, - "step": 209980 - }, - { - "epoch": 1.8563800632967342, - "grad_norm": 2.6689350605010986, - "learning_rate": 1.906033227838776e-05, - "loss": 0.5439, - "step": 209990 - }, - { - "epoch": 1.8564684665570468, - "grad_norm": 2.3570046424865723, - "learning_rate": 1.9058858890715893e-05, - "loss": 0.7371, - "step": 210000 - }, - { - "epoch": 1.856556869817359, - "grad_norm": 3.1186368465423584, - "learning_rate": 1.905738550304402e-05, - "loss": 0.4917, - "step": 210010 - }, - { - "epoch": 1.856645273077671, - "grad_norm": 1.5461078882217407, - "learning_rate": 1.905591211537215e-05, - "loss": 0.6838, - "step": 210020 - }, - { - "epoch": 1.8567336763379834, - "grad_norm": 0.710598349571228, - "learning_rate": 1.9054438727700278e-05, - "loss": 0.5545, - "step": 210030 - }, - { - "epoch": 1.8568220795982957, - "grad_norm": 9.732213973999023, - "learning_rate": 1.905296534002841e-05, - "loss": 0.6883, - "step": 210040 - }, - { - "epoch": 1.8569104828586078, - "grad_norm": 1.8715342283248901, - "learning_rate": 1.9051491952356538e-05, - "loss": 0.5413, - "step": 210050 - }, - { - "epoch": 1.85699888611892, - "grad_norm": 7.001986026763916, - "learning_rate": 1.9050018564684667e-05, - "loss": 0.5849, - "step": 210060 - }, - { - "epoch": 1.8570872893792323, - "grad_norm": 2.913346290588379, - "learning_rate": 1.90485451770128e-05, - "loss": 0.5106, - "step": 210070 - }, - { - "epoch": 1.8571756926395446, - "grad_norm": 2.3131251335144043, - "learning_rate": 1.9047071789340927e-05, - "loss": 0.5835, - "step": 210080 - }, - { - "epoch": 1.8572640958998567, - "grad_norm": 5.071016788482666, - "learning_rate": 1.9045598401669055e-05, - "loss": 0.645, - "step": 210090 - }, - { - "epoch": 1.8573524991601689, - "grad_norm": 2.1975395679473877, - "learning_rate": 1.9044125013997183e-05, - "loss": 0.4592, - "step": 210100 - }, - { - "epoch": 1.8574409024204814, - "grad_norm": 1.8032022714614868, - "learning_rate": 1.9042651626325315e-05, - "loss": 0.7926, - "step": 210110 - }, - { - "epoch": 1.8575293056807936, - "grad_norm": 3.4323999881744385, - "learning_rate": 1.9041178238653444e-05, - "loss": 0.5741, - "step": 210120 - }, - { - "epoch": 1.8576177089411057, - "grad_norm": 2.3246331214904785, - "learning_rate": 1.9039704850981572e-05, - "loss": 0.5563, - "step": 210130 - }, - { - "epoch": 1.857706112201418, - "grad_norm": 2.060991048812866, - "learning_rate": 1.90382314633097e-05, - "loss": 0.544, - "step": 210140 - }, - { - "epoch": 1.8577945154617304, - "grad_norm": 4.852057456970215, - "learning_rate": 1.9036758075637832e-05, - "loss": 0.6653, - "step": 210150 - }, - { - "epoch": 1.8578829187220425, - "grad_norm": 27.585124969482422, - "learning_rate": 1.903528468796596e-05, - "loss": 0.6218, - "step": 210160 - }, - { - "epoch": 1.8579713219823546, - "grad_norm": 4.210274696350098, - "learning_rate": 1.903381130029409e-05, - "loss": 0.5708, - "step": 210170 - }, - { - "epoch": 1.858059725242667, - "grad_norm": 1.9030274152755737, - "learning_rate": 1.903233791262222e-05, - "loss": 0.4775, - "step": 210180 - }, - { - "epoch": 1.8581481285029793, - "grad_norm": 6.925843715667725, - "learning_rate": 1.903086452495035e-05, - "loss": 0.6747, - "step": 210190 - }, - { - "epoch": 1.8582365317632914, - "grad_norm": 1.128814935684204, - "learning_rate": 1.9029391137278477e-05, - "loss": 0.5575, - "step": 210200 - }, - { - "epoch": 1.8583249350236035, - "grad_norm": 3.0503857135772705, - "learning_rate": 1.9027917749606605e-05, - "loss": 0.6345, - "step": 210210 - }, - { - "epoch": 1.8584133382839159, - "grad_norm": 11.362192153930664, - "learning_rate": 1.9026444361934737e-05, - "loss": 0.5727, - "step": 210220 - }, - { - "epoch": 1.8585017415442282, - "grad_norm": 2.16227388381958, - "learning_rate": 1.9024970974262866e-05, - "loss": 0.4944, - "step": 210230 - }, - { - "epoch": 1.8585901448045403, - "grad_norm": 3.194079875946045, - "learning_rate": 1.9023497586590994e-05, - "loss": 0.644, - "step": 210240 - }, - { - "epoch": 1.8586785480648527, - "grad_norm": 1.760561466217041, - "learning_rate": 1.9022024198919126e-05, - "loss": 0.6066, - "step": 210250 - }, - { - "epoch": 1.858766951325165, - "grad_norm": 1.222104787826538, - "learning_rate": 1.9020550811247254e-05, - "loss": 0.5032, - "step": 210260 - }, - { - "epoch": 1.8588553545854771, - "grad_norm": 1.9924540519714355, - "learning_rate": 1.9019077423575382e-05, - "loss": 0.4528, - "step": 210270 - }, - { - "epoch": 1.8589437578457892, - "grad_norm": 1.879380702972412, - "learning_rate": 1.901760403590351e-05, - "loss": 0.6124, - "step": 210280 - }, - { - "epoch": 1.8590321611061016, - "grad_norm": 1.26064932346344, - "learning_rate": 1.9016130648231642e-05, - "loss": 0.5068, - "step": 210290 - }, - { - "epoch": 1.859120564366414, - "grad_norm": 1.4377273321151733, - "learning_rate": 1.901465726055977e-05, - "loss": 0.7046, - "step": 210300 - }, - { - "epoch": 1.859208967626726, - "grad_norm": 1.9383795261383057, - "learning_rate": 1.90131838728879e-05, - "loss": 0.6046, - "step": 210310 - }, - { - "epoch": 1.8592973708870382, - "grad_norm": 4.310141086578369, - "learning_rate": 1.9011710485216028e-05, - "loss": 0.6647, - "step": 210320 - }, - { - "epoch": 1.8593857741473505, - "grad_norm": 1.1649905443191528, - "learning_rate": 1.901023709754416e-05, - "loss": 0.5247, - "step": 210330 - }, - { - "epoch": 1.8594741774076629, - "grad_norm": 2.907787322998047, - "learning_rate": 1.9008763709872288e-05, - "loss": 0.5539, - "step": 210340 - }, - { - "epoch": 1.859562580667975, - "grad_norm": 4.768134593963623, - "learning_rate": 1.9007290322200416e-05, - "loss": 0.5305, - "step": 210350 - }, - { - "epoch": 1.8596509839282873, - "grad_norm": 2.1298699378967285, - "learning_rate": 1.9005816934528548e-05, - "loss": 0.6477, - "step": 210360 - }, - { - "epoch": 1.8597393871885997, - "grad_norm": 5.800304889678955, - "learning_rate": 1.9004343546856676e-05, - "loss": 0.568, - "step": 210370 - }, - { - "epoch": 1.8598277904489118, - "grad_norm": 4.090860366821289, - "learning_rate": 1.9002870159184804e-05, - "loss": 0.6223, - "step": 210380 - }, - { - "epoch": 1.859916193709224, - "grad_norm": 4.281871795654297, - "learning_rate": 1.9001396771512933e-05, - "loss": 0.5283, - "step": 210390 - }, - { - "epoch": 1.8600045969695362, - "grad_norm": 2.082164764404297, - "learning_rate": 1.8999923383841065e-05, - "loss": 0.7171, - "step": 210400 - }, - { - "epoch": 1.8600930002298486, - "grad_norm": 2.0262606143951416, - "learning_rate": 1.8998449996169193e-05, - "loss": 0.5147, - "step": 210410 - }, - { - "epoch": 1.8601814034901607, - "grad_norm": 6.9588470458984375, - "learning_rate": 1.899697660849732e-05, - "loss": 0.5693, - "step": 210420 - }, - { - "epoch": 1.8602698067504728, - "grad_norm": 0.828840970993042, - "learning_rate": 1.8995503220825453e-05, - "loss": 0.4583, - "step": 210430 - }, - { - "epoch": 1.8603582100107852, - "grad_norm": 1.6092866659164429, - "learning_rate": 1.899402983315358e-05, - "loss": 0.592, - "step": 210440 - }, - { - "epoch": 1.8604466132710975, - "grad_norm": 1.7956653833389282, - "learning_rate": 1.899255644548171e-05, - "loss": 0.6539, - "step": 210450 - }, - { - "epoch": 1.8605350165314096, - "grad_norm": 18.42283821105957, - "learning_rate": 1.8991083057809838e-05, - "loss": 0.5401, - "step": 210460 - }, - { - "epoch": 1.860623419791722, - "grad_norm": 0.954200804233551, - "learning_rate": 1.898960967013797e-05, - "loss": 0.5161, - "step": 210470 - }, - { - "epoch": 1.8607118230520343, - "grad_norm": 1.5571684837341309, - "learning_rate": 1.8988136282466098e-05, - "loss": 0.6886, - "step": 210480 - }, - { - "epoch": 1.8608002263123464, - "grad_norm": 2.7032861709594727, - "learning_rate": 1.8986662894794226e-05, - "loss": 0.5252, - "step": 210490 - }, - { - "epoch": 1.8608886295726585, - "grad_norm": 2.116373300552368, - "learning_rate": 1.8985189507122355e-05, - "loss": 0.5481, - "step": 210500 - }, - { - "epoch": 1.860977032832971, - "grad_norm": 2.9152045249938965, - "learning_rate": 1.8983716119450487e-05, - "loss": 0.4912, - "step": 210510 - }, - { - "epoch": 1.8610654360932832, - "grad_norm": 1.7530553340911865, - "learning_rate": 1.8982242731778615e-05, - "loss": 0.5536, - "step": 210520 - }, - { - "epoch": 1.8611538393535954, - "grad_norm": 2.242582321166992, - "learning_rate": 1.8980769344106743e-05, - "loss": 0.576, - "step": 210530 - }, - { - "epoch": 1.8612422426139075, - "grad_norm": 1.1391644477844238, - "learning_rate": 1.8979295956434875e-05, - "loss": 0.5832, - "step": 210540 - }, - { - "epoch": 1.8613306458742198, - "grad_norm": 2.775040864944458, - "learning_rate": 1.8977822568763003e-05, - "loss": 0.5738, - "step": 210550 - }, - { - "epoch": 1.8614190491345322, - "grad_norm": 6.068946361541748, - "learning_rate": 1.8976349181091132e-05, - "loss": 0.6366, - "step": 210560 - }, - { - "epoch": 1.8615074523948443, - "grad_norm": 0.9730151891708374, - "learning_rate": 1.897487579341926e-05, - "loss": 0.5877, - "step": 210570 - }, - { - "epoch": 1.8615958556551566, - "grad_norm": 8.074066162109375, - "learning_rate": 1.8973402405747392e-05, - "loss": 0.6844, - "step": 210580 - }, - { - "epoch": 1.861684258915469, - "grad_norm": 3.2558224201202393, - "learning_rate": 1.897192901807552e-05, - "loss": 0.5831, - "step": 210590 - }, - { - "epoch": 1.861772662175781, - "grad_norm": 3.300506114959717, - "learning_rate": 1.897045563040365e-05, - "loss": 0.6114, - "step": 210600 - }, - { - "epoch": 1.8618610654360932, - "grad_norm": 5.034607887268066, - "learning_rate": 1.8968982242731777e-05, - "loss": 0.52, - "step": 210610 - }, - { - "epoch": 1.8619494686964055, - "grad_norm": 1.6265817880630493, - "learning_rate": 1.896750885505991e-05, - "loss": 0.5916, - "step": 210620 - }, - { - "epoch": 1.8620378719567179, - "grad_norm": 3.206765651702881, - "learning_rate": 1.8966035467388037e-05, - "loss": 0.5087, - "step": 210630 - }, - { - "epoch": 1.86212627521703, - "grad_norm": 2.3355016708374023, - "learning_rate": 1.8964562079716165e-05, - "loss": 0.6369, - "step": 210640 - }, - { - "epoch": 1.8622146784773421, - "grad_norm": 1.6999640464782715, - "learning_rate": 1.8963088692044297e-05, - "loss": 0.6797, - "step": 210650 - }, - { - "epoch": 1.8623030817376545, - "grad_norm": 1.7024785280227661, - "learning_rate": 1.8961615304372425e-05, - "loss": 0.5105, - "step": 210660 - }, - { - "epoch": 1.8623914849979668, - "grad_norm": 0.689441978931427, - "learning_rate": 1.8960141916700554e-05, - "loss": 0.5794, - "step": 210670 - }, - { - "epoch": 1.862479888258279, - "grad_norm": 1.237181305885315, - "learning_rate": 1.8958668529028682e-05, - "loss": 0.4893, - "step": 210680 - }, - { - "epoch": 1.862568291518591, - "grad_norm": 2.740414619445801, - "learning_rate": 1.8957195141356814e-05, - "loss": 0.7547, - "step": 210690 - }, - { - "epoch": 1.8626566947789036, - "grad_norm": 2.399672031402588, - "learning_rate": 1.8955721753684942e-05, - "loss": 0.6757, - "step": 210700 - }, - { - "epoch": 1.8627450980392157, - "grad_norm": 10.054362297058105, - "learning_rate": 1.895424836601307e-05, - "loss": 0.6307, - "step": 210710 - }, - { - "epoch": 1.8628335012995278, - "grad_norm": 3.8515055179595947, - "learning_rate": 1.8952774978341202e-05, - "loss": 0.5937, - "step": 210720 - }, - { - "epoch": 1.8629219045598402, - "grad_norm": 3.577775716781616, - "learning_rate": 1.895130159066933e-05, - "loss": 0.5866, - "step": 210730 - }, - { - "epoch": 1.8630103078201525, - "grad_norm": 14.429736137390137, - "learning_rate": 1.894982820299746e-05, - "loss": 0.7103, - "step": 210740 - }, - { - "epoch": 1.8630987110804647, - "grad_norm": 2.594417095184326, - "learning_rate": 1.8948354815325587e-05, - "loss": 0.5822, - "step": 210750 - }, - { - "epoch": 1.8631871143407768, - "grad_norm": 1.7685571908950806, - "learning_rate": 1.894688142765372e-05, - "loss": 0.5286, - "step": 210760 - }, - { - "epoch": 1.8632755176010891, - "grad_norm": 2.778186559677124, - "learning_rate": 1.8945408039981848e-05, - "loss": 0.4858, - "step": 210770 - }, - { - "epoch": 1.8633639208614015, - "grad_norm": 2.275498628616333, - "learning_rate": 1.8943934652309976e-05, - "loss": 0.4612, - "step": 210780 - }, - { - "epoch": 1.8634523241217136, - "grad_norm": 5.5841779708862305, - "learning_rate": 1.8942461264638108e-05, - "loss": 0.6562, - "step": 210790 - }, - { - "epoch": 1.8635407273820257, - "grad_norm": 2.1666529178619385, - "learning_rate": 1.8940987876966236e-05, - "loss": 0.5021, - "step": 210800 - }, - { - "epoch": 1.863629130642338, - "grad_norm": 5.69851541519165, - "learning_rate": 1.8939514489294364e-05, - "loss": 0.5039, - "step": 210810 - }, - { - "epoch": 1.8637175339026504, - "grad_norm": 7.253466606140137, - "learning_rate": 1.8938041101622496e-05, - "loss": 0.6435, - "step": 210820 - }, - { - "epoch": 1.8638059371629625, - "grad_norm": 1.8968863487243652, - "learning_rate": 1.8936567713950624e-05, - "loss": 0.5547, - "step": 210830 - }, - { - "epoch": 1.8638943404232748, - "grad_norm": 3.991114616394043, - "learning_rate": 1.8935094326278753e-05, - "loss": 0.542, - "step": 210840 - }, - { - "epoch": 1.8639827436835872, - "grad_norm": 2.539324998855591, - "learning_rate": 1.8933620938606885e-05, - "loss": 0.6605, - "step": 210850 - }, - { - "epoch": 1.8640711469438993, - "grad_norm": 1.6203175783157349, - "learning_rate": 1.8932147550935013e-05, - "loss": 0.5179, - "step": 210860 - }, - { - "epoch": 1.8641595502042114, - "grad_norm": 5.415097713470459, - "learning_rate": 1.8930674163263145e-05, - "loss": 0.5718, - "step": 210870 - }, - { - "epoch": 1.8642479534645238, - "grad_norm": 3.519313335418701, - "learning_rate": 1.8929200775591273e-05, - "loss": 0.5557, - "step": 210880 - }, - { - "epoch": 1.864336356724836, - "grad_norm": 14.324116706848145, - "learning_rate": 1.89277273879194e-05, - "loss": 0.6179, - "step": 210890 - }, - { - "epoch": 1.8644247599851482, - "grad_norm": 2.7402307987213135, - "learning_rate": 1.8926254000247533e-05, - "loss": 0.62, - "step": 210900 - }, - { - "epoch": 1.8645131632454603, - "grad_norm": 1.7623101472854614, - "learning_rate": 1.892478061257566e-05, - "loss": 0.6093, - "step": 210910 - }, - { - "epoch": 1.8646015665057727, - "grad_norm": 1.7904170751571655, - "learning_rate": 1.892330722490379e-05, - "loss": 0.6941, - "step": 210920 - }, - { - "epoch": 1.864689969766085, - "grad_norm": 2.7217648029327393, - "learning_rate": 1.8921833837231918e-05, - "loss": 0.569, - "step": 210930 - }, - { - "epoch": 1.8647783730263972, - "grad_norm": 3.0506932735443115, - "learning_rate": 1.892036044956005e-05, - "loss": 0.6003, - "step": 210940 - }, - { - "epoch": 1.8648667762867095, - "grad_norm": 4.51693058013916, - "learning_rate": 1.8918887061888178e-05, - "loss": 0.7066, - "step": 210950 - }, - { - "epoch": 1.8649551795470218, - "grad_norm": 0.9649724960327148, - "learning_rate": 1.8917413674216307e-05, - "loss": 0.526, - "step": 210960 - }, - { - "epoch": 1.865043582807334, - "grad_norm": 5.553628444671631, - "learning_rate": 1.8915940286544435e-05, - "loss": 0.6509, - "step": 210970 - }, - { - "epoch": 1.865131986067646, - "grad_norm": 3.0489587783813477, - "learning_rate": 1.8914466898872567e-05, - "loss": 0.6754, - "step": 210980 - }, - { - "epoch": 1.8652203893279584, - "grad_norm": 10.719182014465332, - "learning_rate": 1.8912993511200695e-05, - "loss": 0.6287, - "step": 210990 - }, - { - "epoch": 1.8653087925882708, - "grad_norm": 5.158768653869629, - "learning_rate": 1.8911520123528823e-05, - "loss": 0.5916, - "step": 211000 - }, - { - "epoch": 1.8653971958485829, - "grad_norm": 5.014809608459473, - "learning_rate": 1.8910046735856955e-05, - "loss": 0.6263, - "step": 211010 - }, - { - "epoch": 1.865485599108895, - "grad_norm": 2.151365280151367, - "learning_rate": 1.8908573348185083e-05, - "loss": 0.4735, - "step": 211020 - }, - { - "epoch": 1.8655740023692073, - "grad_norm": 1.3339873552322388, - "learning_rate": 1.8907099960513212e-05, - "loss": 0.5552, - "step": 211030 - }, - { - "epoch": 1.8656624056295197, - "grad_norm": 2.4142069816589355, - "learning_rate": 1.890562657284134e-05, - "loss": 0.6132, - "step": 211040 - }, - { - "epoch": 1.8657508088898318, - "grad_norm": 5.738188743591309, - "learning_rate": 1.8904153185169472e-05, - "loss": 0.5236, - "step": 211050 - }, - { - "epoch": 1.8658392121501441, - "grad_norm": 0.7615451216697693, - "learning_rate": 1.89026797974976e-05, - "loss": 0.4567, - "step": 211060 - }, - { - "epoch": 1.8659276154104565, - "grad_norm": 3.8887619972229004, - "learning_rate": 1.890120640982573e-05, - "loss": 0.626, - "step": 211070 - }, - { - "epoch": 1.8660160186707686, - "grad_norm": 2.3460888862609863, - "learning_rate": 1.889973302215386e-05, - "loss": 0.636, - "step": 211080 - }, - { - "epoch": 1.8661044219310807, - "grad_norm": 9.921082496643066, - "learning_rate": 1.889825963448199e-05, - "loss": 0.6063, - "step": 211090 - }, - { - "epoch": 1.866192825191393, - "grad_norm": 3.5486340522766113, - "learning_rate": 1.8896786246810117e-05, - "loss": 0.6388, - "step": 211100 - }, - { - "epoch": 1.8662812284517054, - "grad_norm": 1.7861011028289795, - "learning_rate": 1.8895312859138245e-05, - "loss": 0.5378, - "step": 211110 - }, - { - "epoch": 1.8663696317120175, - "grad_norm": 4.1411237716674805, - "learning_rate": 1.8893839471466377e-05, - "loss": 0.5782, - "step": 211120 - }, - { - "epoch": 1.8664580349723296, - "grad_norm": 4.170247554779053, - "learning_rate": 1.8892366083794506e-05, - "loss": 0.5814, - "step": 211130 - }, - { - "epoch": 1.866546438232642, - "grad_norm": 4.27308988571167, - "learning_rate": 1.8890892696122634e-05, - "loss": 0.5502, - "step": 211140 - }, - { - "epoch": 1.8666348414929543, - "grad_norm": 2.343512773513794, - "learning_rate": 1.8889419308450762e-05, - "loss": 0.4894, - "step": 211150 - }, - { - "epoch": 1.8667232447532665, - "grad_norm": 3.4192721843719482, - "learning_rate": 1.8887945920778894e-05, - "loss": 0.6186, - "step": 211160 - }, - { - "epoch": 1.8668116480135788, - "grad_norm": 10.524596214294434, - "learning_rate": 1.8886472533107022e-05, - "loss": 0.6242, - "step": 211170 - }, - { - "epoch": 1.8669000512738911, - "grad_norm": 2.186183214187622, - "learning_rate": 1.888499914543515e-05, - "loss": 0.6875, - "step": 211180 - }, - { - "epoch": 1.8669884545342033, - "grad_norm": 3.4638583660125732, - "learning_rate": 1.8883525757763282e-05, - "loss": 0.4402, - "step": 211190 - }, - { - "epoch": 1.8670768577945154, - "grad_norm": 1.5348575115203857, - "learning_rate": 1.888205237009141e-05, - "loss": 0.4851, - "step": 211200 - }, - { - "epoch": 1.8671652610548277, - "grad_norm": 2.961879014968872, - "learning_rate": 1.888057898241954e-05, - "loss": 0.5269, - "step": 211210 - }, - { - "epoch": 1.86725366431514, - "grad_norm": 2.18208646774292, - "learning_rate": 1.8879105594747667e-05, - "loss": 0.5045, - "step": 211220 - }, - { - "epoch": 1.8673420675754522, - "grad_norm": 2.162020444869995, - "learning_rate": 1.88776322070758e-05, - "loss": 0.5443, - "step": 211230 - }, - { - "epoch": 1.8674304708357643, - "grad_norm": 16.945356369018555, - "learning_rate": 1.8876158819403928e-05, - "loss": 0.6668, - "step": 211240 - }, - { - "epoch": 1.8675188740960766, - "grad_norm": 1.7313823699951172, - "learning_rate": 1.8874685431732056e-05, - "loss": 0.6056, - "step": 211250 - }, - { - "epoch": 1.867607277356389, - "grad_norm": 1.8208528757095337, - "learning_rate": 1.8873212044060184e-05, - "loss": 0.6211, - "step": 211260 - }, - { - "epoch": 1.867695680616701, - "grad_norm": 1.1184802055358887, - "learning_rate": 1.8871738656388316e-05, - "loss": 0.5644, - "step": 211270 - }, - { - "epoch": 1.8677840838770132, - "grad_norm": 8.482893943786621, - "learning_rate": 1.8870265268716444e-05, - "loss": 0.4855, - "step": 211280 - }, - { - "epoch": 1.8678724871373258, - "grad_norm": 2.199266195297241, - "learning_rate": 1.8868791881044573e-05, - "loss": 0.4921, - "step": 211290 - }, - { - "epoch": 1.867960890397638, - "grad_norm": 1.4627994298934937, - "learning_rate": 1.8867318493372704e-05, - "loss": 0.5601, - "step": 211300 - }, - { - "epoch": 1.86804929365795, - "grad_norm": 4.6828413009643555, - "learning_rate": 1.8865845105700833e-05, - "loss": 0.5459, - "step": 211310 - }, - { - "epoch": 1.8681376969182624, - "grad_norm": 2.588606357574463, - "learning_rate": 1.886437171802896e-05, - "loss": 0.5119, - "step": 211320 - }, - { - "epoch": 1.8682261001785747, - "grad_norm": 3.95709228515625, - "learning_rate": 1.886289833035709e-05, - "loss": 0.5759, - "step": 211330 - }, - { - "epoch": 1.8683145034388868, - "grad_norm": 14.782010078430176, - "learning_rate": 1.886142494268522e-05, - "loss": 0.4809, - "step": 211340 - }, - { - "epoch": 1.868402906699199, - "grad_norm": 7.163753986358643, - "learning_rate": 1.885995155501335e-05, - "loss": 0.541, - "step": 211350 - }, - { - "epoch": 1.8684913099595113, - "grad_norm": 1.8535152673721313, - "learning_rate": 1.8858478167341478e-05, - "loss": 0.5425, - "step": 211360 - }, - { - "epoch": 1.8685797132198236, - "grad_norm": 2.3897852897644043, - "learning_rate": 1.885700477966961e-05, - "loss": 0.504, - "step": 211370 - }, - { - "epoch": 1.8686681164801358, - "grad_norm": 9.051289558410645, - "learning_rate": 1.8855531391997738e-05, - "loss": 0.6437, - "step": 211380 - }, - { - "epoch": 1.8687565197404479, - "grad_norm": 2.9049437046051025, - "learning_rate": 1.8854058004325866e-05, - "loss": 0.6196, - "step": 211390 - }, - { - "epoch": 1.8688449230007602, - "grad_norm": 9.958059310913086, - "learning_rate": 1.8852584616653995e-05, - "loss": 0.6195, - "step": 211400 - }, - { - "epoch": 1.8689333262610726, - "grad_norm": 5.780620574951172, - "learning_rate": 1.8851111228982127e-05, - "loss": 0.4466, - "step": 211410 - }, - { - "epoch": 1.8690217295213847, - "grad_norm": 1.4476335048675537, - "learning_rate": 1.8849637841310255e-05, - "loss": 0.624, - "step": 211420 - }, - { - "epoch": 1.869110132781697, - "grad_norm": 2.000783920288086, - "learning_rate": 1.8848164453638383e-05, - "loss": 0.4835, - "step": 211430 - }, - { - "epoch": 1.8691985360420094, - "grad_norm": 1.3840090036392212, - "learning_rate": 1.884669106596651e-05, - "loss": 0.5851, - "step": 211440 - }, - { - "epoch": 1.8692869393023215, - "grad_norm": 2.47864031791687, - "learning_rate": 1.8845217678294643e-05, - "loss": 0.6764, - "step": 211450 - }, - { - "epoch": 1.8693753425626336, - "grad_norm": 1.6236169338226318, - "learning_rate": 1.8843744290622772e-05, - "loss": 0.6003, - "step": 211460 - }, - { - "epoch": 1.869463745822946, - "grad_norm": 2.9928510189056396, - "learning_rate": 1.88422709029509e-05, - "loss": 0.5077, - "step": 211470 - }, - { - "epoch": 1.8695521490832583, - "grad_norm": 3.0410666465759277, - "learning_rate": 1.8840797515279032e-05, - "loss": 0.6633, - "step": 211480 - }, - { - "epoch": 1.8696405523435704, - "grad_norm": 1.3951866626739502, - "learning_rate": 1.883932412760716e-05, - "loss": 0.6126, - "step": 211490 - }, - { - "epoch": 1.8697289556038825, - "grad_norm": 1.5205440521240234, - "learning_rate": 1.883785073993529e-05, - "loss": 0.6052, - "step": 211500 - }, - { - "epoch": 1.8698173588641949, - "grad_norm": 2.1935489177703857, - "learning_rate": 1.8836377352263417e-05, - "loss": 0.6031, - "step": 211510 - }, - { - "epoch": 1.8699057621245072, - "grad_norm": 2.1376328468322754, - "learning_rate": 1.883490396459155e-05, - "loss": 0.5603, - "step": 211520 - }, - { - "epoch": 1.8699941653848193, - "grad_norm": 2.075496196746826, - "learning_rate": 1.8833430576919677e-05, - "loss": 0.5508, - "step": 211530 - }, - { - "epoch": 1.8700825686451317, - "grad_norm": 1.8145004510879517, - "learning_rate": 1.8831957189247805e-05, - "loss": 0.6853, - "step": 211540 - }, - { - "epoch": 1.870170971905444, - "grad_norm": 1.899678349494934, - "learning_rate": 1.8830483801575937e-05, - "loss": 0.6093, - "step": 211550 - }, - { - "epoch": 1.8702593751657561, - "grad_norm": 1.9398306608200073, - "learning_rate": 1.8829010413904065e-05, - "loss": 0.5764, - "step": 211560 - }, - { - "epoch": 1.8703477784260683, - "grad_norm": 0.985061526298523, - "learning_rate": 1.8827537026232194e-05, - "loss": 0.5547, - "step": 211570 - }, - { - "epoch": 1.8704361816863806, - "grad_norm": 2.241347551345825, - "learning_rate": 1.8826063638560322e-05, - "loss": 0.6419, - "step": 211580 - }, - { - "epoch": 1.870524584946693, - "grad_norm": 1.3498973846435547, - "learning_rate": 1.8824590250888454e-05, - "loss": 0.4481, - "step": 211590 - }, - { - "epoch": 1.870612988207005, - "grad_norm": 2.329902410507202, - "learning_rate": 1.8823116863216582e-05, - "loss": 0.548, - "step": 211600 - }, - { - "epoch": 1.8707013914673172, - "grad_norm": 3.2804064750671387, - "learning_rate": 1.882164347554471e-05, - "loss": 0.5119, - "step": 211610 - }, - { - "epoch": 1.8707897947276295, - "grad_norm": 15.255338668823242, - "learning_rate": 1.882017008787284e-05, - "loss": 0.6286, - "step": 211620 - }, - { - "epoch": 1.8708781979879419, - "grad_norm": 2.46301007270813, - "learning_rate": 1.881869670020097e-05, - "loss": 0.6802, - "step": 211630 - }, - { - "epoch": 1.870966601248254, - "grad_norm": 5.738197326660156, - "learning_rate": 1.88172233125291e-05, - "loss": 0.6169, - "step": 211640 - }, - { - "epoch": 1.8710550045085663, - "grad_norm": 3.685235023498535, - "learning_rate": 1.8815749924857227e-05, - "loss": 0.5951, - "step": 211650 - }, - { - "epoch": 1.8711434077688787, - "grad_norm": 12.558107376098633, - "learning_rate": 1.881427653718536e-05, - "loss": 0.6296, - "step": 211660 - }, - { - "epoch": 1.8712318110291908, - "grad_norm": 4.235104560852051, - "learning_rate": 1.8812803149513487e-05, - "loss": 0.55, - "step": 211670 - }, - { - "epoch": 1.871320214289503, - "grad_norm": 4.005383014678955, - "learning_rate": 1.8811329761841616e-05, - "loss": 0.5216, - "step": 211680 - }, - { - "epoch": 1.8714086175498152, - "grad_norm": 17.446208953857422, - "learning_rate": 1.8809856374169748e-05, - "loss": 0.7013, - "step": 211690 - }, - { - "epoch": 1.8714970208101276, - "grad_norm": 3.2882604598999023, - "learning_rate": 1.8808382986497876e-05, - "loss": 0.6057, - "step": 211700 - }, - { - "epoch": 1.8715854240704397, - "grad_norm": 1.3633511066436768, - "learning_rate": 1.8806909598826004e-05, - "loss": 0.4949, - "step": 211710 - }, - { - "epoch": 1.8716738273307518, - "grad_norm": 2.982347249984741, - "learning_rate": 1.8805436211154136e-05, - "loss": 0.5158, - "step": 211720 - }, - { - "epoch": 1.8717622305910642, - "grad_norm": 2.555624485015869, - "learning_rate": 1.8803962823482264e-05, - "loss": 0.5961, - "step": 211730 - }, - { - "epoch": 1.8718506338513765, - "grad_norm": 12.946317672729492, - "learning_rate": 1.8802489435810393e-05, - "loss": 0.6125, - "step": 211740 - }, - { - "epoch": 1.8719390371116886, - "grad_norm": 2.439051628112793, - "learning_rate": 1.8801016048138524e-05, - "loss": 0.6064, - "step": 211750 - }, - { - "epoch": 1.872027440372001, - "grad_norm": 2.308988571166992, - "learning_rate": 1.8799542660466653e-05, - "loss": 0.5771, - "step": 211760 - }, - { - "epoch": 1.8721158436323133, - "grad_norm": 19.356401443481445, - "learning_rate": 1.879806927279478e-05, - "loss": 0.6851, - "step": 211770 - }, - { - "epoch": 1.8722042468926254, - "grad_norm": 2.0891366004943848, - "learning_rate": 1.8796595885122913e-05, - "loss": 0.7139, - "step": 211780 - }, - { - "epoch": 1.8722926501529376, - "grad_norm": 3.92799973487854, - "learning_rate": 1.879512249745104e-05, - "loss": 0.5393, - "step": 211790 - }, - { - "epoch": 1.87238105341325, - "grad_norm": 2.865943193435669, - "learning_rate": 1.879364910977917e-05, - "loss": 0.5939, - "step": 211800 - }, - { - "epoch": 1.8724694566735622, - "grad_norm": 11.907809257507324, - "learning_rate": 1.87921757221073e-05, - "loss": 0.5798, - "step": 211810 - }, - { - "epoch": 1.8725578599338744, - "grad_norm": 1.8399838209152222, - "learning_rate": 1.879070233443543e-05, - "loss": 0.5951, - "step": 211820 - }, - { - "epoch": 1.8726462631941865, - "grad_norm": 11.052319526672363, - "learning_rate": 1.8789228946763558e-05, - "loss": 0.6618, - "step": 211830 - }, - { - "epoch": 1.8727346664544988, - "grad_norm": 1.8781675100326538, - "learning_rate": 1.878775555909169e-05, - "loss": 0.5829, - "step": 211840 - }, - { - "epoch": 1.8728230697148112, - "grad_norm": 3.443168878555298, - "learning_rate": 1.8786282171419818e-05, - "loss": 0.6249, - "step": 211850 - }, - { - "epoch": 1.8729114729751233, - "grad_norm": 1.7187124490737915, - "learning_rate": 1.8784808783747947e-05, - "loss": 0.5567, - "step": 211860 - }, - { - "epoch": 1.8729998762354354, - "grad_norm": 1.6366448402404785, - "learning_rate": 1.8783335396076075e-05, - "loss": 0.5983, - "step": 211870 - }, - { - "epoch": 1.873088279495748, - "grad_norm": 3.721496343612671, - "learning_rate": 1.8781862008404207e-05, - "loss": 0.6211, - "step": 211880 - }, - { - "epoch": 1.87317668275606, - "grad_norm": 2.662074089050293, - "learning_rate": 1.8780388620732335e-05, - "loss": 0.5681, - "step": 211890 - }, - { - "epoch": 1.8732650860163722, - "grad_norm": 3.5479068756103516, - "learning_rate": 1.8778915233060463e-05, - "loss": 0.6416, - "step": 211900 - }, - { - "epoch": 1.8733534892766845, - "grad_norm": 1.3248940706253052, - "learning_rate": 1.877744184538859e-05, - "loss": 0.6511, - "step": 211910 - }, - { - "epoch": 1.8734418925369969, - "grad_norm": 1.1158252954483032, - "learning_rate": 1.8775968457716723e-05, - "loss": 0.6935, - "step": 211920 - }, - { - "epoch": 1.873530295797309, - "grad_norm": 1.1364325284957886, - "learning_rate": 1.8774495070044852e-05, - "loss": 0.6279, - "step": 211930 - }, - { - "epoch": 1.8736186990576211, - "grad_norm": 5.12778902053833, - "learning_rate": 1.877302168237298e-05, - "loss": 0.5977, - "step": 211940 - }, - { - "epoch": 1.8737071023179335, - "grad_norm": 8.264141082763672, - "learning_rate": 1.8771548294701112e-05, - "loss": 0.6718, - "step": 211950 - }, - { - "epoch": 1.8737955055782458, - "grad_norm": 7.577733993530273, - "learning_rate": 1.877007490702924e-05, - "loss": 0.4922, - "step": 211960 - }, - { - "epoch": 1.873883908838558, - "grad_norm": 2.200882911682129, - "learning_rate": 1.876860151935737e-05, - "loss": 0.5648, - "step": 211970 - }, - { - "epoch": 1.87397231209887, - "grad_norm": 2.6261353492736816, - "learning_rate": 1.8767128131685497e-05, - "loss": 0.6939, - "step": 211980 - }, - { - "epoch": 1.8740607153591824, - "grad_norm": 7.190047264099121, - "learning_rate": 1.876565474401363e-05, - "loss": 0.6494, - "step": 211990 - }, - { - "epoch": 1.8741491186194947, - "grad_norm": 2.867805242538452, - "learning_rate": 1.8764181356341757e-05, - "loss": 0.7128, - "step": 212000 - }, - { - "epoch": 1.8742375218798069, - "grad_norm": 1.2611019611358643, - "learning_rate": 1.8762707968669885e-05, - "loss": 0.4783, - "step": 212010 - }, - { - "epoch": 1.8743259251401192, - "grad_norm": 8.136725425720215, - "learning_rate": 1.8761234580998017e-05, - "loss": 0.5401, - "step": 212020 - }, - { - "epoch": 1.8744143284004315, - "grad_norm": 2.3983137607574463, - "learning_rate": 1.8759761193326145e-05, - "loss": 0.5398, - "step": 212030 - }, - { - "epoch": 1.8745027316607437, - "grad_norm": 2.939603090286255, - "learning_rate": 1.8758287805654274e-05, - "loss": 0.5078, - "step": 212040 - }, - { - "epoch": 1.8745911349210558, - "grad_norm": 2.2624449729919434, - "learning_rate": 1.8756814417982402e-05, - "loss": 0.6563, - "step": 212050 - }, - { - "epoch": 1.8746795381813681, - "grad_norm": 11.68359088897705, - "learning_rate": 1.8755341030310534e-05, - "loss": 0.627, - "step": 212060 - }, - { - "epoch": 1.8747679414416805, - "grad_norm": 1.6339939832687378, - "learning_rate": 1.8753867642638662e-05, - "loss": 0.5462, - "step": 212070 - }, - { - "epoch": 1.8748563447019926, - "grad_norm": 6.112826824188232, - "learning_rate": 1.875239425496679e-05, - "loss": 0.6341, - "step": 212080 - }, - { - "epoch": 1.8749447479623047, - "grad_norm": 1.7221001386642456, - "learning_rate": 1.875092086729492e-05, - "loss": 0.6001, - "step": 212090 - }, - { - "epoch": 1.875033151222617, - "grad_norm": 1.1448161602020264, - "learning_rate": 1.874944747962305e-05, - "loss": 0.5859, - "step": 212100 - }, - { - "epoch": 1.8751215544829294, - "grad_norm": 3.3444106578826904, - "learning_rate": 1.874797409195118e-05, - "loss": 0.5248, - "step": 212110 - }, - { - "epoch": 1.8752099577432415, - "grad_norm": 4.8078508377075195, - "learning_rate": 1.8746500704279307e-05, - "loss": 0.5533, - "step": 212120 - }, - { - "epoch": 1.8752983610035538, - "grad_norm": 1.4071780443191528, - "learning_rate": 1.874502731660744e-05, - "loss": 0.6851, - "step": 212130 - }, - { - "epoch": 1.8753867642638662, - "grad_norm": 5.860563278198242, - "learning_rate": 1.8743553928935568e-05, - "loss": 0.5989, - "step": 212140 - }, - { - "epoch": 1.8754751675241783, - "grad_norm": 5.743417263031006, - "learning_rate": 1.8742080541263696e-05, - "loss": 0.5763, - "step": 212150 - }, - { - "epoch": 1.8755635707844904, - "grad_norm": 1.6394555568695068, - "learning_rate": 1.8740607153591824e-05, - "loss": 0.5499, - "step": 212160 - }, - { - "epoch": 1.8756519740448028, - "grad_norm": 1.9762955904006958, - "learning_rate": 1.8739133765919956e-05, - "loss": 0.565, - "step": 212170 - }, - { - "epoch": 1.8757403773051151, - "grad_norm": 2.414860725402832, - "learning_rate": 1.8737660378248084e-05, - "loss": 0.5682, - "step": 212180 - }, - { - "epoch": 1.8758287805654272, - "grad_norm": 2.592427968978882, - "learning_rate": 1.8736186990576213e-05, - "loss": 0.4106, - "step": 212190 - }, - { - "epoch": 1.8759171838257394, - "grad_norm": 8.744173049926758, - "learning_rate": 1.873471360290434e-05, - "loss": 0.8138, - "step": 212200 - }, - { - "epoch": 1.8760055870860517, - "grad_norm": 2.453145980834961, - "learning_rate": 1.8733240215232473e-05, - "loss": 0.5933, - "step": 212210 - }, - { - "epoch": 1.876093990346364, - "grad_norm": 1.2898881435394287, - "learning_rate": 1.87317668275606e-05, - "loss": 0.5223, - "step": 212220 - }, - { - "epoch": 1.8761823936066762, - "grad_norm": 2.6530442237854004, - "learning_rate": 1.873029343988873e-05, - "loss": 0.5596, - "step": 212230 - }, - { - "epoch": 1.8762707968669885, - "grad_norm": 1.509381890296936, - "learning_rate": 1.872882005221686e-05, - "loss": 0.4527, - "step": 212240 - }, - { - "epoch": 1.8763592001273008, - "grad_norm": 6.895394802093506, - "learning_rate": 1.872734666454499e-05, - "loss": 0.4692, - "step": 212250 - }, - { - "epoch": 1.876447603387613, - "grad_norm": 9.649444580078125, - "learning_rate": 1.8725873276873118e-05, - "loss": 0.5743, - "step": 212260 - }, - { - "epoch": 1.876536006647925, - "grad_norm": 2.2451863288879395, - "learning_rate": 1.8724399889201246e-05, - "loss": 0.6761, - "step": 212270 - }, - { - "epoch": 1.8766244099082374, - "grad_norm": 3.3644282817840576, - "learning_rate": 1.8722926501529378e-05, - "loss": 0.6537, - "step": 212280 - }, - { - "epoch": 1.8767128131685498, - "grad_norm": 3.8315041065216064, - "learning_rate": 1.8721453113857506e-05, - "loss": 0.553, - "step": 212290 - }, - { - "epoch": 1.8768012164288619, - "grad_norm": 7.9946794509887695, - "learning_rate": 1.8719979726185635e-05, - "loss": 0.7313, - "step": 212300 - }, - { - "epoch": 1.876889619689174, - "grad_norm": 1.6254922151565552, - "learning_rate": 1.8718506338513766e-05, - "loss": 0.5888, - "step": 212310 - }, - { - "epoch": 1.8769780229494863, - "grad_norm": 4.383457660675049, - "learning_rate": 1.8717032950841895e-05, - "loss": 0.5613, - "step": 212320 - }, - { - "epoch": 1.8770664262097987, - "grad_norm": 1.949416160583496, - "learning_rate": 1.8715559563170023e-05, - "loss": 0.5842, - "step": 212330 - }, - { - "epoch": 1.8771548294701108, - "grad_norm": 1.9556927680969238, - "learning_rate": 1.871408617549815e-05, - "loss": 0.5902, - "step": 212340 - }, - { - "epoch": 1.8772432327304232, - "grad_norm": 4.232676029205322, - "learning_rate": 1.8712612787826283e-05, - "loss": 0.6059, - "step": 212350 - }, - { - "epoch": 1.8773316359907355, - "grad_norm": 1.3433644771575928, - "learning_rate": 1.871113940015441e-05, - "loss": 0.7392, - "step": 212360 - }, - { - "epoch": 1.8774200392510476, - "grad_norm": 1.6884804964065552, - "learning_rate": 1.870966601248254e-05, - "loss": 0.5262, - "step": 212370 - }, - { - "epoch": 1.8775084425113597, - "grad_norm": 8.2138032913208, - "learning_rate": 1.870819262481067e-05, - "loss": 0.661, - "step": 212380 - }, - { - "epoch": 1.877596845771672, - "grad_norm": 1.5879830121994019, - "learning_rate": 1.87067192371388e-05, - "loss": 0.5244, - "step": 212390 - }, - { - "epoch": 1.8776852490319844, - "grad_norm": 2.5254714488983154, - "learning_rate": 1.870524584946693e-05, - "loss": 0.5861, - "step": 212400 - }, - { - "epoch": 1.8777736522922965, - "grad_norm": 1.065232753753662, - "learning_rate": 1.8703772461795057e-05, - "loss": 0.5534, - "step": 212410 - }, - { - "epoch": 1.8778620555526087, - "grad_norm": 2.345660924911499, - "learning_rate": 1.870229907412319e-05, - "loss": 0.573, - "step": 212420 - }, - { - "epoch": 1.877950458812921, - "grad_norm": 1.898077130317688, - "learning_rate": 1.8700825686451317e-05, - "loss": 0.4771, - "step": 212430 - }, - { - "epoch": 1.8780388620732333, - "grad_norm": 2.2095444202423096, - "learning_rate": 1.8699352298779445e-05, - "loss": 0.6287, - "step": 212440 - }, - { - "epoch": 1.8781272653335455, - "grad_norm": 1.463356852531433, - "learning_rate": 1.8697878911107574e-05, - "loss": 0.5482, - "step": 212450 - }, - { - "epoch": 1.8782156685938576, - "grad_norm": 3.3391504287719727, - "learning_rate": 1.8696405523435705e-05, - "loss": 0.5935, - "step": 212460 - }, - { - "epoch": 1.8783040718541701, - "grad_norm": 2.8792531490325928, - "learning_rate": 1.8694932135763834e-05, - "loss": 0.5928, - "step": 212470 - }, - { - "epoch": 1.8783924751144823, - "grad_norm": 2.7586793899536133, - "learning_rate": 1.8693458748091962e-05, - "loss": 0.5689, - "step": 212480 - }, - { - "epoch": 1.8784808783747944, - "grad_norm": 5.905708312988281, - "learning_rate": 1.8691985360420094e-05, - "loss": 0.5409, - "step": 212490 - }, - { - "epoch": 1.8785692816351067, - "grad_norm": 14.364883422851562, - "learning_rate": 1.8690511972748222e-05, - "loss": 0.6887, - "step": 212500 - }, - { - "epoch": 1.878657684895419, - "grad_norm": 2.8432703018188477, - "learning_rate": 1.868903858507635e-05, - "loss": 0.5137, - "step": 212510 - }, - { - "epoch": 1.8787460881557312, - "grad_norm": 1.5833494663238525, - "learning_rate": 1.868756519740448e-05, - "loss": 0.4548, - "step": 212520 - }, - { - "epoch": 1.8788344914160433, - "grad_norm": 1.6333998441696167, - "learning_rate": 1.868609180973261e-05, - "loss": 0.5316, - "step": 212530 - }, - { - "epoch": 1.8789228946763556, - "grad_norm": 1.2314115762710571, - "learning_rate": 1.868461842206074e-05, - "loss": 0.4912, - "step": 212540 - }, - { - "epoch": 1.879011297936668, - "grad_norm": 4.743088245391846, - "learning_rate": 1.8683145034388867e-05, - "loss": 0.5233, - "step": 212550 - }, - { - "epoch": 1.87909970119698, - "grad_norm": 2.5482184886932373, - "learning_rate": 1.8681671646716996e-05, - "loss": 0.6037, - "step": 212560 - }, - { - "epoch": 1.8791881044572922, - "grad_norm": 3.077543258666992, - "learning_rate": 1.8680198259045127e-05, - "loss": 0.6617, - "step": 212570 - }, - { - "epoch": 1.8792765077176046, - "grad_norm": 2.091076135635376, - "learning_rate": 1.8678724871373256e-05, - "loss": 0.515, - "step": 212580 - }, - { - "epoch": 1.879364910977917, - "grad_norm": 1.3151177167892456, - "learning_rate": 1.8677251483701384e-05, - "loss": 0.5848, - "step": 212590 - }, - { - "epoch": 1.879453314238229, - "grad_norm": 1.5070290565490723, - "learning_rate": 1.8675778096029516e-05, - "loss": 0.5968, - "step": 212600 - }, - { - "epoch": 1.8795417174985414, - "grad_norm": 4.931157112121582, - "learning_rate": 1.8674304708357644e-05, - "loss": 0.5331, - "step": 212610 - }, - { - "epoch": 1.8796301207588537, - "grad_norm": 2.310117244720459, - "learning_rate": 1.8672831320685773e-05, - "loss": 0.4836, - "step": 212620 - }, - { - "epoch": 1.8797185240191658, - "grad_norm": 1.8003199100494385, - "learning_rate": 1.8671357933013904e-05, - "loss": 0.5507, - "step": 212630 - }, - { - "epoch": 1.879806927279478, - "grad_norm": 1.0642681121826172, - "learning_rate": 1.8669884545342033e-05, - "loss": 0.5688, - "step": 212640 - }, - { - "epoch": 1.8798953305397903, - "grad_norm": 3.1406989097595215, - "learning_rate": 1.866841115767016e-05, - "loss": 0.5875, - "step": 212650 - }, - { - "epoch": 1.8799837338001026, - "grad_norm": 1.50808584690094, - "learning_rate": 1.8666937769998293e-05, - "loss": 0.5141, - "step": 212660 - }, - { - "epoch": 1.8800721370604148, - "grad_norm": 3.3878180980682373, - "learning_rate": 1.866546438232642e-05, - "loss": 0.5458, - "step": 212670 - }, - { - "epoch": 1.8801605403207269, - "grad_norm": 1.265716314315796, - "learning_rate": 1.866399099465455e-05, - "loss": 0.5068, - "step": 212680 - }, - { - "epoch": 1.8802489435810392, - "grad_norm": 3.274003028869629, - "learning_rate": 1.866251760698268e-05, - "loss": 0.5244, - "step": 212690 - }, - { - "epoch": 1.8803373468413516, - "grad_norm": 1.5882059335708618, - "learning_rate": 1.866104421931081e-05, - "loss": 0.5657, - "step": 212700 - }, - { - "epoch": 1.8804257501016637, - "grad_norm": 3.2886435985565186, - "learning_rate": 1.8659570831638938e-05, - "loss": 0.6797, - "step": 212710 - }, - { - "epoch": 1.880514153361976, - "grad_norm": 2.704702615737915, - "learning_rate": 1.865809744396707e-05, - "loss": 0.5966, - "step": 212720 - }, - { - "epoch": 1.8806025566222884, - "grad_norm": 2.2841033935546875, - "learning_rate": 1.8656624056295198e-05, - "loss": 0.5501, - "step": 212730 - }, - { - "epoch": 1.8806909598826005, - "grad_norm": 18.79593276977539, - "learning_rate": 1.8655150668623326e-05, - "loss": 0.601, - "step": 212740 - }, - { - "epoch": 1.8807793631429126, - "grad_norm": 2.4465584754943848, - "learning_rate": 1.8653677280951458e-05, - "loss": 0.5834, - "step": 212750 - }, - { - "epoch": 1.880867766403225, - "grad_norm": 1.4901008605957031, - "learning_rate": 1.8652203893279586e-05, - "loss": 0.5264, - "step": 212760 - }, - { - "epoch": 1.8809561696635373, - "grad_norm": 3.8159334659576416, - "learning_rate": 1.8650730505607715e-05, - "loss": 0.5935, - "step": 212770 - }, - { - "epoch": 1.8810445729238494, - "grad_norm": 2.6459484100341797, - "learning_rate": 1.8649257117935847e-05, - "loss": 0.5992, - "step": 212780 - }, - { - "epoch": 1.8811329761841615, - "grad_norm": 2.558025598526001, - "learning_rate": 1.8647783730263975e-05, - "loss": 0.5407, - "step": 212790 - }, - { - "epoch": 1.8812213794444739, - "grad_norm": 2.620041608810425, - "learning_rate": 1.8646310342592103e-05, - "loss": 0.493, - "step": 212800 - }, - { - "epoch": 1.8813097827047862, - "grad_norm": 1.0128344297409058, - "learning_rate": 1.864483695492023e-05, - "loss": 0.6075, - "step": 212810 - }, - { - "epoch": 1.8813981859650983, - "grad_norm": 2.3276541233062744, - "learning_rate": 1.8643363567248363e-05, - "loss": 0.7396, - "step": 212820 - }, - { - "epoch": 1.8814865892254107, - "grad_norm": 6.157994270324707, - "learning_rate": 1.8641890179576492e-05, - "loss": 0.5341, - "step": 212830 - }, - { - "epoch": 1.881574992485723, - "grad_norm": 3.8587722778320312, - "learning_rate": 1.864041679190462e-05, - "loss": 0.6586, - "step": 212840 - }, - { - "epoch": 1.8816633957460351, - "grad_norm": 2.199636936187744, - "learning_rate": 1.863894340423275e-05, - "loss": 0.5565, - "step": 212850 - }, - { - "epoch": 1.8817517990063473, - "grad_norm": 3.426286458969116, - "learning_rate": 1.863747001656088e-05, - "loss": 0.4645, - "step": 212860 - }, - { - "epoch": 1.8818402022666596, - "grad_norm": 0.8287149667739868, - "learning_rate": 1.863599662888901e-05, - "loss": 0.5721, - "step": 212870 - }, - { - "epoch": 1.881928605526972, - "grad_norm": 5.03294563293457, - "learning_rate": 1.8634523241217137e-05, - "loss": 0.4732, - "step": 212880 - }, - { - "epoch": 1.882017008787284, - "grad_norm": 1.1851072311401367, - "learning_rate": 1.863304985354527e-05, - "loss": 0.678, - "step": 212890 - }, - { - "epoch": 1.8821054120475962, - "grad_norm": 6.871645927429199, - "learning_rate": 1.8631576465873397e-05, - "loss": 0.7234, - "step": 212900 - }, - { - "epoch": 1.8821938153079085, - "grad_norm": 5.614889144897461, - "learning_rate": 1.8630103078201525e-05, - "loss": 0.5841, - "step": 212910 - }, - { - "epoch": 1.8822822185682209, - "grad_norm": 3.654690980911255, - "learning_rate": 1.8628629690529654e-05, - "loss": 0.4902, - "step": 212920 - }, - { - "epoch": 1.882370621828533, - "grad_norm": 9.863300323486328, - "learning_rate": 1.8627156302857785e-05, - "loss": 0.6219, - "step": 212930 - }, - { - "epoch": 1.8824590250888453, - "grad_norm": 1.1956835985183716, - "learning_rate": 1.8625682915185914e-05, - "loss": 0.5397, - "step": 212940 - }, - { - "epoch": 1.8825474283491577, - "grad_norm": 1.8851938247680664, - "learning_rate": 1.8624209527514042e-05, - "loss": 0.6684, - "step": 212950 - }, - { - "epoch": 1.8826358316094698, - "grad_norm": 2.18277645111084, - "learning_rate": 1.8622736139842174e-05, - "loss": 0.6259, - "step": 212960 - }, - { - "epoch": 1.882724234869782, - "grad_norm": 3.0681982040405273, - "learning_rate": 1.8621262752170302e-05, - "loss": 0.5543, - "step": 212970 - }, - { - "epoch": 1.8828126381300943, - "grad_norm": 2.2887508869171143, - "learning_rate": 1.861978936449843e-05, - "loss": 0.5822, - "step": 212980 - }, - { - "epoch": 1.8829010413904066, - "grad_norm": 11.29295539855957, - "learning_rate": 1.861831597682656e-05, - "loss": 0.5882, - "step": 212990 - }, - { - "epoch": 1.8829894446507187, - "grad_norm": 1.1387871503829956, - "learning_rate": 1.861684258915469e-05, - "loss": 0.7087, - "step": 213000 - }, - { - "epoch": 1.8830778479110308, - "grad_norm": 2.4769179821014404, - "learning_rate": 1.861536920148282e-05, - "loss": 0.6643, - "step": 213010 - }, - { - "epoch": 1.8831662511713432, - "grad_norm": 2.312826156616211, - "learning_rate": 1.8613895813810947e-05, - "loss": 0.5726, - "step": 213020 - }, - { - "epoch": 1.8832546544316555, - "grad_norm": 4.7945780754089355, - "learning_rate": 1.8612422426139076e-05, - "loss": 0.536, - "step": 213030 - }, - { - "epoch": 1.8833430576919676, - "grad_norm": 2.921363592147827, - "learning_rate": 1.8610949038467207e-05, - "loss": 0.6759, - "step": 213040 - }, - { - "epoch": 1.8834314609522798, - "grad_norm": 1.5335419178009033, - "learning_rate": 1.8609475650795336e-05, - "loss": 0.5513, - "step": 213050 - }, - { - "epoch": 1.8835198642125923, - "grad_norm": 2.939746141433716, - "learning_rate": 1.8608002263123464e-05, - "loss": 0.6185, - "step": 213060 - }, - { - "epoch": 1.8836082674729044, - "grad_norm": 10.422958374023438, - "learning_rate": 1.8606528875451596e-05, - "loss": 0.5928, - "step": 213070 - }, - { - "epoch": 1.8836966707332166, - "grad_norm": 1.5900689363479614, - "learning_rate": 1.8605055487779724e-05, - "loss": 0.5744, - "step": 213080 - }, - { - "epoch": 1.883785073993529, - "grad_norm": 1.7086278200149536, - "learning_rate": 1.8603582100107853e-05, - "loss": 0.6819, - "step": 213090 - }, - { - "epoch": 1.8838734772538412, - "grad_norm": 6.898779392242432, - "learning_rate": 1.860210871243598e-05, - "loss": 0.5775, - "step": 213100 - }, - { - "epoch": 1.8839618805141534, - "grad_norm": 1.625374436378479, - "learning_rate": 1.8600635324764113e-05, - "loss": 0.549, - "step": 213110 - }, - { - "epoch": 1.8840502837744655, - "grad_norm": 6.336486339569092, - "learning_rate": 1.859916193709224e-05, - "loss": 0.6147, - "step": 213120 - }, - { - "epoch": 1.8841386870347778, - "grad_norm": 2.499382972717285, - "learning_rate": 1.859768854942037e-05, - "loss": 0.6723, - "step": 213130 - }, - { - "epoch": 1.8842270902950902, - "grad_norm": 1.2628241777420044, - "learning_rate": 1.85962151617485e-05, - "loss": 0.6456, - "step": 213140 - }, - { - "epoch": 1.8843154935554023, - "grad_norm": 1.5055720806121826, - "learning_rate": 1.859474177407663e-05, - "loss": 0.595, - "step": 213150 - }, - { - "epoch": 1.8844038968157144, - "grad_norm": 3.127073049545288, - "learning_rate": 1.8593268386404758e-05, - "loss": 0.558, - "step": 213160 - }, - { - "epoch": 1.8844923000760267, - "grad_norm": 4.078506946563721, - "learning_rate": 1.8591794998732886e-05, - "loss": 0.6386, - "step": 213170 - }, - { - "epoch": 1.884580703336339, - "grad_norm": 1.4510366916656494, - "learning_rate": 1.8590321611061018e-05, - "loss": 0.5584, - "step": 213180 - }, - { - "epoch": 1.8846691065966512, - "grad_norm": 2.42274808883667, - "learning_rate": 1.8588848223389146e-05, - "loss": 0.5751, - "step": 213190 - }, - { - "epoch": 1.8847575098569636, - "grad_norm": 3.6079776287078857, - "learning_rate": 1.8587374835717275e-05, - "loss": 0.6377, - "step": 213200 - }, - { - "epoch": 1.884845913117276, - "grad_norm": 1.661149263381958, - "learning_rate": 1.8585901448045403e-05, - "loss": 0.5397, - "step": 213210 - }, - { - "epoch": 1.884934316377588, - "grad_norm": 0.6031556129455566, - "learning_rate": 1.8584428060373535e-05, - "loss": 0.547, - "step": 213220 - }, - { - "epoch": 1.8850227196379001, - "grad_norm": 15.123361587524414, - "learning_rate": 1.8582954672701663e-05, - "loss": 0.6655, - "step": 213230 - }, - { - "epoch": 1.8851111228982125, - "grad_norm": 4.451207637786865, - "learning_rate": 1.858148128502979e-05, - "loss": 0.687, - "step": 213240 - }, - { - "epoch": 1.8851995261585248, - "grad_norm": 3.892076015472412, - "learning_rate": 1.8580007897357923e-05, - "loss": 0.4964, - "step": 213250 - }, - { - "epoch": 1.885287929418837, - "grad_norm": 1.2234504222869873, - "learning_rate": 1.857853450968605e-05, - "loss": 0.6292, - "step": 213260 - }, - { - "epoch": 1.885376332679149, - "grad_norm": 1.309215784072876, - "learning_rate": 1.857706112201418e-05, - "loss": 0.5311, - "step": 213270 - }, - { - "epoch": 1.8854647359394614, - "grad_norm": 5.029854774475098, - "learning_rate": 1.8575587734342308e-05, - "loss": 0.5742, - "step": 213280 - }, - { - "epoch": 1.8855531391997737, - "grad_norm": 4.400570392608643, - "learning_rate": 1.857411434667044e-05, - "loss": 0.6355, - "step": 213290 - }, - { - "epoch": 1.8856415424600859, - "grad_norm": 4.826441287994385, - "learning_rate": 1.857264095899857e-05, - "loss": 0.6101, - "step": 213300 - }, - { - "epoch": 1.8857299457203982, - "grad_norm": 1.3306336402893066, - "learning_rate": 1.8571167571326697e-05, - "loss": 0.5845, - "step": 213310 - }, - { - "epoch": 1.8858183489807105, - "grad_norm": 5.465448379516602, - "learning_rate": 1.8569694183654825e-05, - "loss": 0.5639, - "step": 213320 - }, - { - "epoch": 1.8859067522410227, - "grad_norm": 1.3071844577789307, - "learning_rate": 1.8568220795982957e-05, - "loss": 0.5018, - "step": 213330 - }, - { - "epoch": 1.8859951555013348, - "grad_norm": 7.508504390716553, - "learning_rate": 1.8566747408311085e-05, - "loss": 0.5757, - "step": 213340 - }, - { - "epoch": 1.8860835587616471, - "grad_norm": 0.7872974276542664, - "learning_rate": 1.8565274020639214e-05, - "loss": 0.5064, - "step": 213350 - }, - { - "epoch": 1.8861719620219595, - "grad_norm": 1.6077816486358643, - "learning_rate": 1.8563800632967345e-05, - "loss": 0.5257, - "step": 213360 - }, - { - "epoch": 1.8862603652822716, - "grad_norm": 6.302602767944336, - "learning_rate": 1.8562327245295474e-05, - "loss": 0.6518, - "step": 213370 - }, - { - "epoch": 1.8863487685425837, - "grad_norm": 1.237106442451477, - "learning_rate": 1.8560853857623602e-05, - "loss": 0.638, - "step": 213380 - }, - { - "epoch": 1.886437171802896, - "grad_norm": 5.677822113037109, - "learning_rate": 1.855938046995173e-05, - "loss": 0.5992, - "step": 213390 - }, - { - "epoch": 1.8865255750632084, - "grad_norm": 7.012515068054199, - "learning_rate": 1.8557907082279862e-05, - "loss": 0.493, - "step": 213400 - }, - { - "epoch": 1.8866139783235205, - "grad_norm": 9.422869682312012, - "learning_rate": 1.855643369460799e-05, - "loss": 0.618, - "step": 213410 - }, - { - "epoch": 1.8867023815838329, - "grad_norm": 3.234034299850464, - "learning_rate": 1.855496030693612e-05, - "loss": 0.5519, - "step": 213420 - }, - { - "epoch": 1.8867907848441452, - "grad_norm": 1.369439959526062, - "learning_rate": 1.855348691926425e-05, - "loss": 0.6308, - "step": 213430 - }, - { - "epoch": 1.8868791881044573, - "grad_norm": 5.693788051605225, - "learning_rate": 1.855201353159238e-05, - "loss": 0.6196, - "step": 213440 - }, - { - "epoch": 1.8869675913647694, - "grad_norm": 1.55559241771698, - "learning_rate": 1.8550540143920507e-05, - "loss": 0.5714, - "step": 213450 - }, - { - "epoch": 1.8870559946250818, - "grad_norm": 6.621094226837158, - "learning_rate": 1.8549066756248636e-05, - "loss": 0.623, - "step": 213460 - }, - { - "epoch": 1.8871443978853941, - "grad_norm": 5.765220642089844, - "learning_rate": 1.8547593368576767e-05, - "loss": 0.5496, - "step": 213470 - }, - { - "epoch": 1.8872328011457062, - "grad_norm": 2.526282548904419, - "learning_rate": 1.8546119980904896e-05, - "loss": 0.6379, - "step": 213480 - }, - { - "epoch": 1.8873212044060184, - "grad_norm": 1.4172158241271973, - "learning_rate": 1.8544646593233024e-05, - "loss": 0.6885, - "step": 213490 - }, - { - "epoch": 1.8874096076663307, - "grad_norm": 8.146158218383789, - "learning_rate": 1.8543173205561152e-05, - "loss": 0.6146, - "step": 213500 - }, - { - "epoch": 1.887498010926643, - "grad_norm": 3.2238504886627197, - "learning_rate": 1.8541699817889284e-05, - "loss": 0.6524, - "step": 213510 - }, - { - "epoch": 1.8875864141869552, - "grad_norm": 2.985931873321533, - "learning_rate": 1.8540226430217413e-05, - "loss": 0.5647, - "step": 213520 - }, - { - "epoch": 1.8876748174472675, - "grad_norm": 2.8664724826812744, - "learning_rate": 1.853875304254554e-05, - "loss": 0.542, - "step": 213530 - }, - { - "epoch": 1.8877632207075798, - "grad_norm": 6.465889930725098, - "learning_rate": 1.8537279654873673e-05, - "loss": 0.6566, - "step": 213540 - }, - { - "epoch": 1.887851623967892, - "grad_norm": 17.41527557373047, - "learning_rate": 1.85358062672018e-05, - "loss": 0.5922, - "step": 213550 - }, - { - "epoch": 1.887940027228204, - "grad_norm": 2.6813340187072754, - "learning_rate": 1.853433287952993e-05, - "loss": 0.5266, - "step": 213560 - }, - { - "epoch": 1.8880284304885164, - "grad_norm": 2.6717472076416016, - "learning_rate": 1.853285949185806e-05, - "loss": 0.5942, - "step": 213570 - }, - { - "epoch": 1.8881168337488288, - "grad_norm": 1.4658883810043335, - "learning_rate": 1.853138610418619e-05, - "loss": 0.6243, - "step": 213580 - }, - { - "epoch": 1.888205237009141, - "grad_norm": 4.908879280090332, - "learning_rate": 1.8529912716514318e-05, - "loss": 0.5461, - "step": 213590 - }, - { - "epoch": 1.888293640269453, - "grad_norm": 8.682324409484863, - "learning_rate": 1.852843932884245e-05, - "loss": 0.555, - "step": 213600 - }, - { - "epoch": 1.8883820435297654, - "grad_norm": 3.517279624938965, - "learning_rate": 1.8526965941170578e-05, - "loss": 0.554, - "step": 213610 - }, - { - "epoch": 1.8884704467900777, - "grad_norm": 0.9724871516227722, - "learning_rate": 1.8525492553498706e-05, - "loss": 0.4883, - "step": 213620 - }, - { - "epoch": 1.8885588500503898, - "grad_norm": 4.339098930358887, - "learning_rate": 1.8524019165826838e-05, - "loss": 0.6278, - "step": 213630 - }, - { - "epoch": 1.888647253310702, - "grad_norm": 1.644022822380066, - "learning_rate": 1.8522545778154966e-05, - "loss": 0.7744, - "step": 213640 - }, - { - "epoch": 1.8887356565710145, - "grad_norm": 3.53022837638855, - "learning_rate": 1.8521072390483095e-05, - "loss": 0.549, - "step": 213650 - }, - { - "epoch": 1.8888240598313266, - "grad_norm": 1.4925897121429443, - "learning_rate": 1.8519599002811226e-05, - "loss": 0.6137, - "step": 213660 - }, - { - "epoch": 1.8889124630916387, - "grad_norm": 2.128223180770874, - "learning_rate": 1.8518125615139355e-05, - "loss": 0.6271, - "step": 213670 - }, - { - "epoch": 1.889000866351951, - "grad_norm": 5.156556606292725, - "learning_rate": 1.8516652227467483e-05, - "loss": 0.486, - "step": 213680 - }, - { - "epoch": 1.8890892696122634, - "grad_norm": 1.796038031578064, - "learning_rate": 1.8515178839795615e-05, - "loss": 0.534, - "step": 213690 - }, - { - "epoch": 1.8891776728725755, - "grad_norm": 3.3110363483428955, - "learning_rate": 1.8513705452123743e-05, - "loss": 0.424, - "step": 213700 - }, - { - "epoch": 1.8892660761328877, - "grad_norm": 2.35679292678833, - "learning_rate": 1.851223206445187e-05, - "loss": 0.5533, - "step": 213710 - }, - { - "epoch": 1.8893544793932, - "grad_norm": 2.322882890701294, - "learning_rate": 1.8510758676780003e-05, - "loss": 0.6785, - "step": 213720 - }, - { - "epoch": 1.8894428826535123, - "grad_norm": 2.069554090499878, - "learning_rate": 1.850928528910813e-05, - "loss": 0.6634, - "step": 213730 - }, - { - "epoch": 1.8895312859138245, - "grad_norm": 12.046005249023438, - "learning_rate": 1.850781190143626e-05, - "loss": 0.8362, - "step": 213740 - }, - { - "epoch": 1.8896196891741366, - "grad_norm": 3.466052293777466, - "learning_rate": 1.850633851376439e-05, - "loss": 0.5317, - "step": 213750 - }, - { - "epoch": 1.889708092434449, - "grad_norm": 4.101285934448242, - "learning_rate": 1.850486512609252e-05, - "loss": 0.6194, - "step": 213760 - }, - { - "epoch": 1.8897964956947613, - "grad_norm": 3.137982130050659, - "learning_rate": 1.850339173842065e-05, - "loss": 0.5138, - "step": 213770 - }, - { - "epoch": 1.8898848989550734, - "grad_norm": 1.803922176361084, - "learning_rate": 1.8501918350748777e-05, - "loss": 0.4798, - "step": 213780 - }, - { - "epoch": 1.8899733022153857, - "grad_norm": 2.399549961090088, - "learning_rate": 1.8500444963076905e-05, - "loss": 0.6397, - "step": 213790 - }, - { - "epoch": 1.890061705475698, - "grad_norm": 4.050331115722656, - "learning_rate": 1.8498971575405037e-05, - "loss": 0.6317, - "step": 213800 - }, - { - "epoch": 1.8901501087360102, - "grad_norm": 3.249687671661377, - "learning_rate": 1.8497498187733165e-05, - "loss": 0.5951, - "step": 213810 - }, - { - "epoch": 1.8902385119963223, - "grad_norm": 3.0217623710632324, - "learning_rate": 1.8496024800061294e-05, - "loss": 0.6623, - "step": 213820 - }, - { - "epoch": 1.8903269152566347, - "grad_norm": 1.2302578687667847, - "learning_rate": 1.8494551412389425e-05, - "loss": 0.6215, - "step": 213830 - }, - { - "epoch": 1.890415318516947, - "grad_norm": 1.431031584739685, - "learning_rate": 1.8493078024717554e-05, - "loss": 0.5895, - "step": 213840 - }, - { - "epoch": 1.8905037217772591, - "grad_norm": 1.6367743015289307, - "learning_rate": 1.8491604637045682e-05, - "loss": 0.5257, - "step": 213850 - }, - { - "epoch": 1.8905921250375712, - "grad_norm": 3.0997941493988037, - "learning_rate": 1.849013124937381e-05, - "loss": 0.6593, - "step": 213860 - }, - { - "epoch": 1.8906805282978836, - "grad_norm": 3.3189096450805664, - "learning_rate": 1.8488657861701942e-05, - "loss": 0.6237, - "step": 213870 - }, - { - "epoch": 1.890768931558196, - "grad_norm": 1.5525599718093872, - "learning_rate": 1.848718447403007e-05, - "loss": 0.5502, - "step": 213880 - }, - { - "epoch": 1.890857334818508, - "grad_norm": 3.968534231185913, - "learning_rate": 1.84857110863582e-05, - "loss": 0.6898, - "step": 213890 - }, - { - "epoch": 1.8909457380788204, - "grad_norm": 2.1128954887390137, - "learning_rate": 1.848423769868633e-05, - "loss": 0.6763, - "step": 213900 - }, - { - "epoch": 1.8910341413391327, - "grad_norm": 2.5275232791900635, - "learning_rate": 1.848276431101446e-05, - "loss": 0.6377, - "step": 213910 - }, - { - "epoch": 1.8911225445994448, - "grad_norm": 20.174570083618164, - "learning_rate": 1.8481290923342587e-05, - "loss": 0.641, - "step": 213920 - }, - { - "epoch": 1.891210947859757, - "grad_norm": 1.4616127014160156, - "learning_rate": 1.8479817535670716e-05, - "loss": 0.6087, - "step": 213930 - }, - { - "epoch": 1.8912993511200693, - "grad_norm": 2.63228440284729, - "learning_rate": 1.8478344147998847e-05, - "loss": 0.5483, - "step": 213940 - }, - { - "epoch": 1.8913877543803816, - "grad_norm": 1.7421663999557495, - "learning_rate": 1.8476870760326976e-05, - "loss": 0.5365, - "step": 213950 - }, - { - "epoch": 1.8914761576406938, - "grad_norm": 2.8034555912017822, - "learning_rate": 1.8475397372655104e-05, - "loss": 0.5906, - "step": 213960 - }, - { - "epoch": 1.8915645609010059, - "grad_norm": 1.3557761907577515, - "learning_rate": 1.8473923984983232e-05, - "loss": 0.5773, - "step": 213970 - }, - { - "epoch": 1.8916529641613182, - "grad_norm": 5.342085838317871, - "learning_rate": 1.8472450597311364e-05, - "loss": 0.4327, - "step": 213980 - }, - { - "epoch": 1.8917413674216306, - "grad_norm": 3.167161226272583, - "learning_rate": 1.8470977209639493e-05, - "loss": 0.6539, - "step": 213990 - }, - { - "epoch": 1.8918297706819427, - "grad_norm": 2.558377504348755, - "learning_rate": 1.846950382196762e-05, - "loss": 0.6095, - "step": 214000 - }, - { - "epoch": 1.891918173942255, - "grad_norm": 8.736021041870117, - "learning_rate": 1.8468030434295753e-05, - "loss": 0.5662, - "step": 214010 - }, - { - "epoch": 1.8920065772025674, - "grad_norm": 1.8692705631256104, - "learning_rate": 1.846655704662388e-05, - "loss": 0.5937, - "step": 214020 - }, - { - "epoch": 1.8920949804628795, - "grad_norm": 1.8699647188186646, - "learning_rate": 1.846508365895201e-05, - "loss": 0.5925, - "step": 214030 - }, - { - "epoch": 1.8921833837231916, - "grad_norm": 1.3535127639770508, - "learning_rate": 1.8463610271280138e-05, - "loss": 0.6094, - "step": 214040 - }, - { - "epoch": 1.892271786983504, - "grad_norm": 4.944995403289795, - "learning_rate": 1.846213688360827e-05, - "loss": 0.6154, - "step": 214050 - }, - { - "epoch": 1.8923601902438163, - "grad_norm": 4.698526859283447, - "learning_rate": 1.8460663495936398e-05, - "loss": 0.5083, - "step": 214060 - }, - { - "epoch": 1.8924485935041284, - "grad_norm": 1.3276554346084595, - "learning_rate": 1.8459190108264526e-05, - "loss": 0.5595, - "step": 214070 - }, - { - "epoch": 1.8925369967644405, - "grad_norm": 1.2327542304992676, - "learning_rate": 1.8457716720592658e-05, - "loss": 0.5325, - "step": 214080 - }, - { - "epoch": 1.8926254000247529, - "grad_norm": 1.654686689376831, - "learning_rate": 1.8456243332920786e-05, - "loss": 0.4257, - "step": 214090 - }, - { - "epoch": 1.8927138032850652, - "grad_norm": 2.887430429458618, - "learning_rate": 1.8454769945248915e-05, - "loss": 0.4465, - "step": 214100 - }, - { - "epoch": 1.8928022065453773, - "grad_norm": 1.4590517282485962, - "learning_rate": 1.8453296557577043e-05, - "loss": 0.4605, - "step": 214110 - }, - { - "epoch": 1.8928906098056897, - "grad_norm": 2.051868200302124, - "learning_rate": 1.8451823169905175e-05, - "loss": 0.641, - "step": 214120 - }, - { - "epoch": 1.892979013066002, - "grad_norm": 4.898158073425293, - "learning_rate": 1.8450349782233303e-05, - "loss": 0.5423, - "step": 214130 - }, - { - "epoch": 1.8930674163263141, - "grad_norm": 1.3424944877624512, - "learning_rate": 1.844887639456143e-05, - "loss": 0.6061, - "step": 214140 - }, - { - "epoch": 1.8931558195866263, - "grad_norm": 11.548501014709473, - "learning_rate": 1.844740300688956e-05, - "loss": 0.556, - "step": 214150 - }, - { - "epoch": 1.8932442228469386, - "grad_norm": 2.811197280883789, - "learning_rate": 1.844592961921769e-05, - "loss": 0.5081, - "step": 214160 - }, - { - "epoch": 1.893332626107251, - "grad_norm": 4.027631759643555, - "learning_rate": 1.844445623154582e-05, - "loss": 0.6443, - "step": 214170 - }, - { - "epoch": 1.893421029367563, - "grad_norm": 2.086879014968872, - "learning_rate": 1.8442982843873948e-05, - "loss": 0.6345, - "step": 214180 - }, - { - "epoch": 1.8935094326278752, - "grad_norm": 1.87912118434906, - "learning_rate": 1.844150945620208e-05, - "loss": 0.6381, - "step": 214190 - }, - { - "epoch": 1.8935978358881875, - "grad_norm": 9.014577865600586, - "learning_rate": 1.844003606853021e-05, - "loss": 0.6226, - "step": 214200 - }, - { - "epoch": 1.8936862391484999, - "grad_norm": 7.582598686218262, - "learning_rate": 1.8438562680858337e-05, - "loss": 0.6206, - "step": 214210 - }, - { - "epoch": 1.893774642408812, - "grad_norm": 1.9980887174606323, - "learning_rate": 1.8437089293186465e-05, - "loss": 0.6895, - "step": 214220 - }, - { - "epoch": 1.8938630456691241, - "grad_norm": 2.1076395511627197, - "learning_rate": 1.8435615905514597e-05, - "loss": 0.6247, - "step": 214230 - }, - { - "epoch": 1.8939514489294367, - "grad_norm": 1.788191556930542, - "learning_rate": 1.8434142517842725e-05, - "loss": 0.5424, - "step": 214240 - }, - { - "epoch": 1.8940398521897488, - "grad_norm": 2.757988214492798, - "learning_rate": 1.8432669130170854e-05, - "loss": 0.6193, - "step": 214250 - }, - { - "epoch": 1.894128255450061, - "grad_norm": 3.09478497505188, - "learning_rate": 1.8431195742498985e-05, - "loss": 0.487, - "step": 214260 - }, - { - "epoch": 1.8942166587103733, - "grad_norm": 2.8185760974884033, - "learning_rate": 1.8429722354827114e-05, - "loss": 0.6102, - "step": 214270 - }, - { - "epoch": 1.8943050619706856, - "grad_norm": 2.3654725551605225, - "learning_rate": 1.8428248967155242e-05, - "loss": 0.525, - "step": 214280 - }, - { - "epoch": 1.8943934652309977, - "grad_norm": 3.9461331367492676, - "learning_rate": 1.842677557948337e-05, - "loss": 0.5549, - "step": 214290 - }, - { - "epoch": 1.8944818684913098, - "grad_norm": 5.00105094909668, - "learning_rate": 1.8425302191811502e-05, - "loss": 0.5221, - "step": 214300 - }, - { - "epoch": 1.8945702717516222, - "grad_norm": 3.3477978706359863, - "learning_rate": 1.842382880413963e-05, - "loss": 0.6683, - "step": 214310 - }, - { - "epoch": 1.8946586750119345, - "grad_norm": 17.018566131591797, - "learning_rate": 1.842235541646776e-05, - "loss": 0.5197, - "step": 214320 - }, - { - "epoch": 1.8947470782722466, - "grad_norm": 2.96636700630188, - "learning_rate": 1.8420882028795887e-05, - "loss": 0.5335, - "step": 214330 - }, - { - "epoch": 1.8948354815325588, - "grad_norm": 17.807235717773438, - "learning_rate": 1.841940864112402e-05, - "loss": 0.5206, - "step": 214340 - }, - { - "epoch": 1.8949238847928713, - "grad_norm": 2.2948529720306396, - "learning_rate": 1.8417935253452147e-05, - "loss": 0.5527, - "step": 214350 - }, - { - "epoch": 1.8950122880531834, - "grad_norm": 1.9467228651046753, - "learning_rate": 1.8416461865780276e-05, - "loss": 0.6366, - "step": 214360 - }, - { - "epoch": 1.8951006913134956, - "grad_norm": 3.9395129680633545, - "learning_rate": 1.8414988478108407e-05, - "loss": 0.5505, - "step": 214370 - }, - { - "epoch": 1.895189094573808, - "grad_norm": 7.28964376449585, - "learning_rate": 1.8413515090436536e-05, - "loss": 0.5626, - "step": 214380 - }, - { - "epoch": 1.8952774978341203, - "grad_norm": 5.027669906616211, - "learning_rate": 1.8412041702764664e-05, - "loss": 0.5649, - "step": 214390 - }, - { - "epoch": 1.8953659010944324, - "grad_norm": 3.4936208724975586, - "learning_rate": 1.8410568315092792e-05, - "loss": 0.6373, - "step": 214400 - }, - { - "epoch": 1.8954543043547445, - "grad_norm": 3.587873697280884, - "learning_rate": 1.8409094927420924e-05, - "loss": 0.5265, - "step": 214410 - }, - { - "epoch": 1.8955427076150568, - "grad_norm": 2.6135525703430176, - "learning_rate": 1.8407621539749052e-05, - "loss": 0.524, - "step": 214420 - }, - { - "epoch": 1.8956311108753692, - "grad_norm": 1.8510371446609497, - "learning_rate": 1.840614815207718e-05, - "loss": 0.6583, - "step": 214430 - }, - { - "epoch": 1.8957195141356813, - "grad_norm": 5.618251800537109, - "learning_rate": 1.840467476440531e-05, - "loss": 0.5369, - "step": 214440 - }, - { - "epoch": 1.8958079173959934, - "grad_norm": 9.209452629089355, - "learning_rate": 1.840320137673344e-05, - "loss": 0.6332, - "step": 214450 - }, - { - "epoch": 1.8958963206563058, - "grad_norm": 2.0192315578460693, - "learning_rate": 1.840172798906157e-05, - "loss": 0.627, - "step": 214460 - }, - { - "epoch": 1.895984723916618, - "grad_norm": 1.686482548713684, - "learning_rate": 1.8400254601389698e-05, - "loss": 0.5642, - "step": 214470 - }, - { - "epoch": 1.8960731271769302, - "grad_norm": 1.6534452438354492, - "learning_rate": 1.839878121371783e-05, - "loss": 0.5933, - "step": 214480 - }, - { - "epoch": 1.8961615304372426, - "grad_norm": 2.584453582763672, - "learning_rate": 1.8397307826045958e-05, - "loss": 0.5834, - "step": 214490 - }, - { - "epoch": 1.896249933697555, - "grad_norm": 20.656749725341797, - "learning_rate": 1.8395834438374086e-05, - "loss": 0.5675, - "step": 214500 - }, - { - "epoch": 1.896338336957867, - "grad_norm": 4.196337699890137, - "learning_rate": 1.8394361050702218e-05, - "loss": 0.4978, - "step": 214510 - }, - { - "epoch": 1.8964267402181791, - "grad_norm": 5.75929594039917, - "learning_rate": 1.8392887663030346e-05, - "loss": 0.6418, - "step": 214520 - }, - { - "epoch": 1.8965151434784915, - "grad_norm": 2.27282452583313, - "learning_rate": 1.8391414275358475e-05, - "loss": 0.6251, - "step": 214530 - }, - { - "epoch": 1.8966035467388038, - "grad_norm": 2.5718026161193848, - "learning_rate": 1.8389940887686606e-05, - "loss": 0.5063, - "step": 214540 - }, - { - "epoch": 1.896691949999116, - "grad_norm": 9.644734382629395, - "learning_rate": 1.8388467500014735e-05, - "loss": 0.7092, - "step": 214550 - }, - { - "epoch": 1.896780353259428, - "grad_norm": 1.9957596063613892, - "learning_rate": 1.8386994112342863e-05, - "loss": 0.4627, - "step": 214560 - }, - { - "epoch": 1.8968687565197404, - "grad_norm": 2.2706170082092285, - "learning_rate": 1.8385520724670995e-05, - "loss": 0.3897, - "step": 214570 - }, - { - "epoch": 1.8969571597800527, - "grad_norm": 1.3469334840774536, - "learning_rate": 1.8384047336999123e-05, - "loss": 0.6298, - "step": 214580 - }, - { - "epoch": 1.8970455630403649, - "grad_norm": 1.8614156246185303, - "learning_rate": 1.838257394932725e-05, - "loss": 0.6533, - "step": 214590 - }, - { - "epoch": 1.8971339663006772, - "grad_norm": 1.2404825687408447, - "learning_rate": 1.8381100561655383e-05, - "loss": 0.5264, - "step": 214600 - }, - { - "epoch": 1.8972223695609896, - "grad_norm": 1.3020907640457153, - "learning_rate": 1.837962717398351e-05, - "loss": 0.5894, - "step": 214610 - }, - { - "epoch": 1.8973107728213017, - "grad_norm": 3.074523448944092, - "learning_rate": 1.837815378631164e-05, - "loss": 0.6098, - "step": 214620 - }, - { - "epoch": 1.8973991760816138, - "grad_norm": 5.36939811706543, - "learning_rate": 1.837668039863977e-05, - "loss": 0.4847, - "step": 214630 - }, - { - "epoch": 1.8974875793419261, - "grad_norm": 2.5673325061798096, - "learning_rate": 1.83752070109679e-05, - "loss": 0.7006, - "step": 214640 - }, - { - "epoch": 1.8975759826022385, - "grad_norm": 2.209733724594116, - "learning_rate": 1.837373362329603e-05, - "loss": 0.61, - "step": 214650 - }, - { - "epoch": 1.8976643858625506, - "grad_norm": 1.1435073614120483, - "learning_rate": 1.837226023562416e-05, - "loss": 0.4654, - "step": 214660 - }, - { - "epoch": 1.8977527891228627, - "grad_norm": 1.2431167364120483, - "learning_rate": 1.837078684795229e-05, - "loss": 0.7263, - "step": 214670 - }, - { - "epoch": 1.897841192383175, - "grad_norm": 1.6127747297286987, - "learning_rate": 1.8369313460280417e-05, - "loss": 0.6197, - "step": 214680 - }, - { - "epoch": 1.8979295956434874, - "grad_norm": 2.1076979637145996, - "learning_rate": 1.8367840072608545e-05, - "loss": 0.5103, - "step": 214690 - }, - { - "epoch": 1.8980179989037995, - "grad_norm": 1.5015486478805542, - "learning_rate": 1.8366366684936677e-05, - "loss": 0.4705, - "step": 214700 - }, - { - "epoch": 1.8981064021641119, - "grad_norm": 10.76135540008545, - "learning_rate": 1.8364893297264805e-05, - "loss": 0.5346, - "step": 214710 - }, - { - "epoch": 1.8981948054244242, - "grad_norm": 8.669085502624512, - "learning_rate": 1.8363419909592934e-05, - "loss": 0.5063, - "step": 214720 - }, - { - "epoch": 1.8982832086847363, - "grad_norm": 3.8375988006591797, - "learning_rate": 1.8361946521921065e-05, - "loss": 0.5008, - "step": 214730 - }, - { - "epoch": 1.8983716119450484, - "grad_norm": 1.3984936475753784, - "learning_rate": 1.8360473134249194e-05, - "loss": 0.5863, - "step": 214740 - }, - { - "epoch": 1.8984600152053608, - "grad_norm": 7.372394561767578, - "learning_rate": 1.8358999746577322e-05, - "loss": 0.6715, - "step": 214750 - }, - { - "epoch": 1.8985484184656731, - "grad_norm": 1.0428462028503418, - "learning_rate": 1.835752635890545e-05, - "loss": 0.4896, - "step": 214760 - }, - { - "epoch": 1.8986368217259852, - "grad_norm": 14.690001487731934, - "learning_rate": 1.8356052971233582e-05, - "loss": 0.6821, - "step": 214770 - }, - { - "epoch": 1.8987252249862974, - "grad_norm": 14.343873023986816, - "learning_rate": 1.835457958356171e-05, - "loss": 0.5994, - "step": 214780 - }, - { - "epoch": 1.8988136282466097, - "grad_norm": 2.0612404346466064, - "learning_rate": 1.835310619588984e-05, - "loss": 0.4487, - "step": 214790 - }, - { - "epoch": 1.898902031506922, - "grad_norm": 2.703902006149292, - "learning_rate": 1.8351632808217967e-05, - "loss": 0.5163, - "step": 214800 - }, - { - "epoch": 1.8989904347672342, - "grad_norm": 1.1653752326965332, - "learning_rate": 1.83501594205461e-05, - "loss": 0.5916, - "step": 214810 - }, - { - "epoch": 1.8990788380275463, - "grad_norm": 2.3188114166259766, - "learning_rate": 1.8348686032874227e-05, - "loss": 0.6301, - "step": 214820 - }, - { - "epoch": 1.8991672412878589, - "grad_norm": 1.8194137811660767, - "learning_rate": 1.8347212645202356e-05, - "loss": 0.4594, - "step": 214830 - }, - { - "epoch": 1.899255644548171, - "grad_norm": 1.9201371669769287, - "learning_rate": 1.8345739257530487e-05, - "loss": 0.5383, - "step": 214840 - }, - { - "epoch": 1.899344047808483, - "grad_norm": 5.971470355987549, - "learning_rate": 1.8344265869858616e-05, - "loss": 0.5505, - "step": 214850 - }, - { - "epoch": 1.8994324510687954, - "grad_norm": 3.129467248916626, - "learning_rate": 1.8342792482186744e-05, - "loss": 0.6249, - "step": 214860 - }, - { - "epoch": 1.8995208543291078, - "grad_norm": 6.293489456176758, - "learning_rate": 1.8341319094514872e-05, - "loss": 0.7571, - "step": 214870 - }, - { - "epoch": 1.89960925758942, - "grad_norm": 5.534122467041016, - "learning_rate": 1.8339845706843004e-05, - "loss": 0.5868, - "step": 214880 - }, - { - "epoch": 1.899697660849732, - "grad_norm": 1.0550578832626343, - "learning_rate": 1.8338372319171133e-05, - "loss": 0.53, - "step": 214890 - }, - { - "epoch": 1.8997860641100444, - "grad_norm": 1.8364028930664062, - "learning_rate": 1.833689893149926e-05, - "loss": 0.6311, - "step": 214900 - }, - { - "epoch": 1.8998744673703567, - "grad_norm": 5.171708583831787, - "learning_rate": 1.833542554382739e-05, - "loss": 0.5155, - "step": 214910 - }, - { - "epoch": 1.8999628706306688, - "grad_norm": 1.2820627689361572, - "learning_rate": 1.833395215615552e-05, - "loss": 0.5437, - "step": 214920 - }, - { - "epoch": 1.900051273890981, - "grad_norm": 11.707216262817383, - "learning_rate": 1.833247876848365e-05, - "loss": 0.5461, - "step": 214930 - }, - { - "epoch": 1.9001396771512935, - "grad_norm": 5.4688005447387695, - "learning_rate": 1.8331005380811778e-05, - "loss": 0.4533, - "step": 214940 - }, - { - "epoch": 1.9002280804116056, - "grad_norm": 2.838073253631592, - "learning_rate": 1.832953199313991e-05, - "loss": 0.4696, - "step": 214950 - }, - { - "epoch": 1.9003164836719177, - "grad_norm": 1.4912468194961548, - "learning_rate": 1.8328058605468038e-05, - "loss": 0.5507, - "step": 214960 - }, - { - "epoch": 1.90040488693223, - "grad_norm": 2.4036059379577637, - "learning_rate": 1.8326585217796166e-05, - "loss": 0.5545, - "step": 214970 - }, - { - "epoch": 1.9004932901925424, - "grad_norm": 2.126760482788086, - "learning_rate": 1.8325111830124294e-05, - "loss": 0.6015, - "step": 214980 - }, - { - "epoch": 1.9005816934528545, - "grad_norm": 4.676766872406006, - "learning_rate": 1.8323638442452426e-05, - "loss": 0.6376, - "step": 214990 - }, - { - "epoch": 1.9006700967131667, - "grad_norm": 12.944665908813477, - "learning_rate": 1.8322165054780555e-05, - "loss": 0.6542, - "step": 215000 - }, - { - "epoch": 1.900758499973479, - "grad_norm": 1.8676060438156128, - "learning_rate": 1.8320691667108683e-05, - "loss": 0.4389, - "step": 215010 - }, - { - "epoch": 1.9008469032337914, - "grad_norm": 2.112060070037842, - "learning_rate": 1.8319218279436815e-05, - "loss": 0.4921, - "step": 215020 - }, - { - "epoch": 1.9009353064941035, - "grad_norm": 1.8731809854507446, - "learning_rate": 1.8317744891764943e-05, - "loss": 0.5691, - "step": 215030 - }, - { - "epoch": 1.9010237097544156, - "grad_norm": 2.719052791595459, - "learning_rate": 1.831627150409307e-05, - "loss": 0.6618, - "step": 215040 - }, - { - "epoch": 1.901112113014728, - "grad_norm": 8.77170467376709, - "learning_rate": 1.83147981164212e-05, - "loss": 0.536, - "step": 215050 - }, - { - "epoch": 1.9012005162750403, - "grad_norm": 1.5281165838241577, - "learning_rate": 1.831332472874933e-05, - "loss": 0.4545, - "step": 215060 - }, - { - "epoch": 1.9012889195353524, - "grad_norm": 2.235581159591675, - "learning_rate": 1.831185134107746e-05, - "loss": 0.6655, - "step": 215070 - }, - { - "epoch": 1.9013773227956647, - "grad_norm": 2.4700381755828857, - "learning_rate": 1.8310377953405588e-05, - "loss": 0.5337, - "step": 215080 - }, - { - "epoch": 1.901465726055977, - "grad_norm": 2.260410785675049, - "learning_rate": 1.8308904565733717e-05, - "loss": 0.6076, - "step": 215090 - }, - { - "epoch": 1.9015541293162892, - "grad_norm": 2.535979747772217, - "learning_rate": 1.8307431178061848e-05, - "loss": 0.6101, - "step": 215100 - }, - { - "epoch": 1.9016425325766013, - "grad_norm": 1.87662672996521, - "learning_rate": 1.8305957790389977e-05, - "loss": 0.4419, - "step": 215110 - }, - { - "epoch": 1.9017309358369137, - "grad_norm": 2.4027507305145264, - "learning_rate": 1.8304484402718105e-05, - "loss": 0.5982, - "step": 215120 - }, - { - "epoch": 1.901819339097226, - "grad_norm": 4.070061683654785, - "learning_rate": 1.8303011015046237e-05, - "loss": 0.399, - "step": 215130 - }, - { - "epoch": 1.9019077423575381, - "grad_norm": 5.777319431304932, - "learning_rate": 1.8301537627374365e-05, - "loss": 0.5812, - "step": 215140 - }, - { - "epoch": 1.9019961456178502, - "grad_norm": 0.7077469825744629, - "learning_rate": 1.8300064239702493e-05, - "loss": 0.5997, - "step": 215150 - }, - { - "epoch": 1.9020845488781626, - "grad_norm": 6.09249210357666, - "learning_rate": 1.8298590852030622e-05, - "loss": 0.7451, - "step": 215160 - }, - { - "epoch": 1.902172952138475, - "grad_norm": 2.2320213317871094, - "learning_rate": 1.8297117464358754e-05, - "loss": 0.518, - "step": 215170 - }, - { - "epoch": 1.902261355398787, - "grad_norm": 1.11752188205719, - "learning_rate": 1.8295644076686882e-05, - "loss": 0.5834, - "step": 215180 - }, - { - "epoch": 1.9023497586590994, - "grad_norm": 2.3647751808166504, - "learning_rate": 1.829417068901501e-05, - "loss": 0.7619, - "step": 215190 - }, - { - "epoch": 1.9024381619194117, - "grad_norm": 4.228518009185791, - "learning_rate": 1.8292697301343142e-05, - "loss": 0.569, - "step": 215200 - }, - { - "epoch": 1.9025265651797238, - "grad_norm": 1.678530216217041, - "learning_rate": 1.829122391367127e-05, - "loss": 0.5302, - "step": 215210 - }, - { - "epoch": 1.902614968440036, - "grad_norm": 2.062027931213379, - "learning_rate": 1.82897505259994e-05, - "loss": 0.5292, - "step": 215220 - }, - { - "epoch": 1.9027033717003483, - "grad_norm": 6.109335422515869, - "learning_rate": 1.8288277138327527e-05, - "loss": 0.6924, - "step": 215230 - }, - { - "epoch": 1.9027917749606607, - "grad_norm": 2.0648276805877686, - "learning_rate": 1.828680375065566e-05, - "loss": 0.5308, - "step": 215240 - }, - { - "epoch": 1.9028801782209728, - "grad_norm": 1.5512999296188354, - "learning_rate": 1.8285330362983787e-05, - "loss": 0.4996, - "step": 215250 - }, - { - "epoch": 1.902968581481285, - "grad_norm": 7.731812953948975, - "learning_rate": 1.8283856975311916e-05, - "loss": 0.6653, - "step": 215260 - }, - { - "epoch": 1.9030569847415972, - "grad_norm": 7.800272464752197, - "learning_rate": 1.8282383587640044e-05, - "loss": 0.6671, - "step": 215270 - }, - { - "epoch": 1.9031453880019096, - "grad_norm": 1.48674476146698, - "learning_rate": 1.8280910199968176e-05, - "loss": 0.4932, - "step": 215280 - }, - { - "epoch": 1.9032337912622217, - "grad_norm": 2.0727829933166504, - "learning_rate": 1.8279436812296304e-05, - "loss": 0.5375, - "step": 215290 - }, - { - "epoch": 1.903322194522534, - "grad_norm": 2.819892644882202, - "learning_rate": 1.8277963424624432e-05, - "loss": 0.5211, - "step": 215300 - }, - { - "epoch": 1.9034105977828464, - "grad_norm": 3.371164321899414, - "learning_rate": 1.8276490036952564e-05, - "loss": 0.4364, - "step": 215310 - }, - { - "epoch": 1.9034990010431585, - "grad_norm": 3.080634355545044, - "learning_rate": 1.8275016649280692e-05, - "loss": 0.6259, - "step": 215320 - }, - { - "epoch": 1.9035874043034706, - "grad_norm": 2.3610153198242188, - "learning_rate": 1.827354326160882e-05, - "loss": 0.5589, - "step": 215330 - }, - { - "epoch": 1.903675807563783, - "grad_norm": 4.80729866027832, - "learning_rate": 1.827206987393695e-05, - "loss": 0.6519, - "step": 215340 - }, - { - "epoch": 1.9037642108240953, - "grad_norm": 1.9493257999420166, - "learning_rate": 1.827059648626508e-05, - "loss": 0.5813, - "step": 215350 - }, - { - "epoch": 1.9038526140844074, - "grad_norm": 2.0165605545043945, - "learning_rate": 1.826912309859321e-05, - "loss": 0.5949, - "step": 215360 - }, - { - "epoch": 1.9039410173447195, - "grad_norm": 2.412551164627075, - "learning_rate": 1.8267649710921338e-05, - "loss": 0.5228, - "step": 215370 - }, - { - "epoch": 1.9040294206050319, - "grad_norm": 3.516951322555542, - "learning_rate": 1.826617632324947e-05, - "loss": 0.5421, - "step": 215380 - }, - { - "epoch": 1.9041178238653442, - "grad_norm": 3.9508211612701416, - "learning_rate": 1.8264702935577598e-05, - "loss": 0.5422, - "step": 215390 - }, - { - "epoch": 1.9042062271256563, - "grad_norm": 4.987118244171143, - "learning_rate": 1.8263229547905726e-05, - "loss": 0.5906, - "step": 215400 - }, - { - "epoch": 1.9042946303859687, - "grad_norm": 2.879135847091675, - "learning_rate": 1.8261756160233854e-05, - "loss": 0.5828, - "step": 215410 - }, - { - "epoch": 1.904383033646281, - "grad_norm": 3.213564395904541, - "learning_rate": 1.8260282772561986e-05, - "loss": 0.5133, - "step": 215420 - }, - { - "epoch": 1.9044714369065932, - "grad_norm": 12.460219383239746, - "learning_rate": 1.8258809384890114e-05, - "loss": 0.6821, - "step": 215430 - }, - { - "epoch": 1.9045598401669053, - "grad_norm": 3.8772828578948975, - "learning_rate": 1.8257335997218243e-05, - "loss": 0.6225, - "step": 215440 - }, - { - "epoch": 1.9046482434272176, - "grad_norm": 3.252964735031128, - "learning_rate": 1.8255862609546375e-05, - "loss": 0.5598, - "step": 215450 - }, - { - "epoch": 1.90473664668753, - "grad_norm": 5.064020156860352, - "learning_rate": 1.8254389221874503e-05, - "loss": 0.4994, - "step": 215460 - }, - { - "epoch": 1.904825049947842, - "grad_norm": 1.0315037965774536, - "learning_rate": 1.825291583420263e-05, - "loss": 0.5049, - "step": 215470 - }, - { - "epoch": 1.9049134532081542, - "grad_norm": 3.551647663116455, - "learning_rate": 1.8251442446530763e-05, - "loss": 0.7413, - "step": 215480 - }, - { - "epoch": 1.9050018564684665, - "grad_norm": 1.5396426916122437, - "learning_rate": 1.824996905885889e-05, - "loss": 0.6819, - "step": 215490 - }, - { - "epoch": 1.9050902597287789, - "grad_norm": 10.421950340270996, - "learning_rate": 1.824849567118702e-05, - "loss": 0.6113, - "step": 215500 - }, - { - "epoch": 1.905178662989091, - "grad_norm": 2.161991834640503, - "learning_rate": 1.824702228351515e-05, - "loss": 0.5494, - "step": 215510 - }, - { - "epoch": 1.9052670662494031, - "grad_norm": 2.80000638961792, - "learning_rate": 1.824554889584328e-05, - "loss": 0.5843, - "step": 215520 - }, - { - "epoch": 1.9053554695097157, - "grad_norm": 0.9141148328781128, - "learning_rate": 1.824407550817141e-05, - "loss": 0.5035, - "step": 215530 - }, - { - "epoch": 1.9054438727700278, - "grad_norm": 1.3247166872024536, - "learning_rate": 1.824260212049954e-05, - "loss": 0.5271, - "step": 215540 - }, - { - "epoch": 1.90553227603034, - "grad_norm": 1.809760332107544, - "learning_rate": 1.8241128732827668e-05, - "loss": 0.5927, - "step": 215550 - }, - { - "epoch": 1.9056206792906523, - "grad_norm": 4.684435844421387, - "learning_rate": 1.8239655345155797e-05, - "loss": 0.5268, - "step": 215560 - }, - { - "epoch": 1.9057090825509646, - "grad_norm": 1.1501007080078125, - "learning_rate": 1.823818195748393e-05, - "loss": 0.5675, - "step": 215570 - }, - { - "epoch": 1.9057974858112767, - "grad_norm": 5.479037761688232, - "learning_rate": 1.8236708569812057e-05, - "loss": 0.7386, - "step": 215580 - }, - { - "epoch": 1.9058858890715888, - "grad_norm": 1.9849950075149536, - "learning_rate": 1.8235235182140185e-05, - "loss": 0.6442, - "step": 215590 - }, - { - "epoch": 1.9059742923319012, - "grad_norm": 5.336343765258789, - "learning_rate": 1.8233761794468317e-05, - "loss": 0.5115, - "step": 215600 - }, - { - "epoch": 1.9060626955922135, - "grad_norm": 2.0101168155670166, - "learning_rate": 1.8232288406796445e-05, - "loss": 0.6037, - "step": 215610 - }, - { - "epoch": 1.9061510988525256, - "grad_norm": 2.029799222946167, - "learning_rate": 1.8230815019124574e-05, - "loss": 0.5625, - "step": 215620 - }, - { - "epoch": 1.9062395021128378, - "grad_norm": 2.2444686889648438, - "learning_rate": 1.8229341631452702e-05, - "loss": 0.6678, - "step": 215630 - }, - { - "epoch": 1.9063279053731501, - "grad_norm": 1.8639298677444458, - "learning_rate": 1.8227868243780834e-05, - "loss": 0.6664, - "step": 215640 - }, - { - "epoch": 1.9064163086334625, - "grad_norm": 2.760430097579956, - "learning_rate": 1.8226394856108962e-05, - "loss": 0.6056, - "step": 215650 - }, - { - "epoch": 1.9065047118937746, - "grad_norm": 9.554108619689941, - "learning_rate": 1.822492146843709e-05, - "loss": 0.5387, - "step": 215660 - }, - { - "epoch": 1.906593115154087, - "grad_norm": 5.9488630294799805, - "learning_rate": 1.8223448080765222e-05, - "loss": 0.6218, - "step": 215670 - }, - { - "epoch": 1.9066815184143993, - "grad_norm": 2.389159917831421, - "learning_rate": 1.822197469309335e-05, - "loss": 0.5919, - "step": 215680 - }, - { - "epoch": 1.9067699216747114, - "grad_norm": 1.4990259408950806, - "learning_rate": 1.822050130542148e-05, - "loss": 0.4699, - "step": 215690 - }, - { - "epoch": 1.9068583249350235, - "grad_norm": 4.160340309143066, - "learning_rate": 1.8219027917749607e-05, - "loss": 0.502, - "step": 215700 - }, - { - "epoch": 1.9069467281953358, - "grad_norm": 3.2409324645996094, - "learning_rate": 1.821755453007774e-05, - "loss": 0.6041, - "step": 215710 - }, - { - "epoch": 1.9070351314556482, - "grad_norm": 3.227299451828003, - "learning_rate": 1.8216081142405867e-05, - "loss": 0.5844, - "step": 215720 - }, - { - "epoch": 1.9071235347159603, - "grad_norm": 1.404015064239502, - "learning_rate": 1.8214607754733996e-05, - "loss": 0.5235, - "step": 215730 - }, - { - "epoch": 1.9072119379762724, - "grad_norm": 7.0780720710754395, - "learning_rate": 1.8213134367062124e-05, - "loss": 0.5693, - "step": 215740 - }, - { - "epoch": 1.9073003412365848, - "grad_norm": 2.1195764541625977, - "learning_rate": 1.8211660979390256e-05, - "loss": 0.535, - "step": 215750 - }, - { - "epoch": 1.907388744496897, - "grad_norm": 0.7465678453445435, - "learning_rate": 1.8210187591718384e-05, - "loss": 0.493, - "step": 215760 - }, - { - "epoch": 1.9074771477572092, - "grad_norm": 4.116894245147705, - "learning_rate": 1.8208714204046512e-05, - "loss": 0.5397, - "step": 215770 - }, - { - "epoch": 1.9075655510175216, - "grad_norm": 1.049372673034668, - "learning_rate": 1.8207240816374644e-05, - "loss": 0.4521, - "step": 215780 - }, - { - "epoch": 1.907653954277834, - "grad_norm": 2.7427937984466553, - "learning_rate": 1.8205767428702772e-05, - "loss": 0.6194, - "step": 215790 - }, - { - "epoch": 1.907742357538146, - "grad_norm": 3.264143228530884, - "learning_rate": 1.82042940410309e-05, - "loss": 0.6236, - "step": 215800 - }, - { - "epoch": 1.9078307607984581, - "grad_norm": 3.0359134674072266, - "learning_rate": 1.820282065335903e-05, - "loss": 0.5412, - "step": 215810 - }, - { - "epoch": 1.9079191640587705, - "grad_norm": 11.858660697937012, - "learning_rate": 1.820134726568716e-05, - "loss": 0.5521, - "step": 215820 - }, - { - "epoch": 1.9080075673190828, - "grad_norm": 2.638056755065918, - "learning_rate": 1.819987387801529e-05, - "loss": 0.6447, - "step": 215830 - }, - { - "epoch": 1.908095970579395, - "grad_norm": 1.1822389364242554, - "learning_rate": 1.8198400490343418e-05, - "loss": 0.4912, - "step": 215840 - }, - { - "epoch": 1.908184373839707, - "grad_norm": 1.999218463897705, - "learning_rate": 1.819692710267155e-05, - "loss": 0.3623, - "step": 215850 - }, - { - "epoch": 1.9082727771000194, - "grad_norm": 1.8566445112228394, - "learning_rate": 1.8195453714999678e-05, - "loss": 0.5936, - "step": 215860 - }, - { - "epoch": 1.9083611803603318, - "grad_norm": 1.7797869443893433, - "learning_rate": 1.8193980327327806e-05, - "loss": 0.6518, - "step": 215870 - }, - { - "epoch": 1.9084495836206439, - "grad_norm": 4.77926778793335, - "learning_rate": 1.8192506939655934e-05, - "loss": 0.5155, - "step": 215880 - }, - { - "epoch": 1.9085379868809562, - "grad_norm": 2.640517473220825, - "learning_rate": 1.8191033551984066e-05, - "loss": 0.6423, - "step": 215890 - }, - { - "epoch": 1.9086263901412686, - "grad_norm": 4.8330841064453125, - "learning_rate": 1.8189560164312195e-05, - "loss": 0.5164, - "step": 215900 - }, - { - "epoch": 1.9087147934015807, - "grad_norm": 2.211228609085083, - "learning_rate": 1.8188086776640323e-05, - "loss": 0.5938, - "step": 215910 - }, - { - "epoch": 1.9088031966618928, - "grad_norm": 3.9917683601379395, - "learning_rate": 1.818661338896845e-05, - "loss": 0.5885, - "step": 215920 - }, - { - "epoch": 1.9088915999222051, - "grad_norm": 4.436807155609131, - "learning_rate": 1.8185140001296583e-05, - "loss": 0.6072, - "step": 215930 - }, - { - "epoch": 1.9089800031825175, - "grad_norm": 1.3593721389770508, - "learning_rate": 1.818366661362471e-05, - "loss": 0.5533, - "step": 215940 - }, - { - "epoch": 1.9090684064428296, - "grad_norm": 8.200021743774414, - "learning_rate": 1.818219322595284e-05, - "loss": 0.5177, - "step": 215950 - }, - { - "epoch": 1.9091568097031417, - "grad_norm": 2.670382022857666, - "learning_rate": 1.818071983828097e-05, - "loss": 0.541, - "step": 215960 - }, - { - "epoch": 1.909245212963454, - "grad_norm": 1.8656916618347168, - "learning_rate": 1.81792464506091e-05, - "loss": 0.6579, - "step": 215970 - }, - { - "epoch": 1.9093336162237664, - "grad_norm": 3.47935152053833, - "learning_rate": 1.8177773062937228e-05, - "loss": 0.561, - "step": 215980 - }, - { - "epoch": 1.9094220194840785, - "grad_norm": 2.670969247817993, - "learning_rate": 1.8176299675265356e-05, - "loss": 0.5686, - "step": 215990 - }, - { - "epoch": 1.9095104227443909, - "grad_norm": 2.4435932636260986, - "learning_rate": 1.8174826287593488e-05, - "loss": 0.588, - "step": 216000 - }, - { - "epoch": 1.9095988260047032, - "grad_norm": 2.103586435317993, - "learning_rate": 1.8173352899921617e-05, - "loss": 0.5022, - "step": 216010 - }, - { - "epoch": 1.9096872292650153, - "grad_norm": 6.973981857299805, - "learning_rate": 1.8171879512249745e-05, - "loss": 0.6578, - "step": 216020 - }, - { - "epoch": 1.9097756325253274, - "grad_norm": 0.8850047588348389, - "learning_rate": 1.8170406124577873e-05, - "loss": 0.7316, - "step": 216030 - }, - { - "epoch": 1.9098640357856398, - "grad_norm": 1.660649299621582, - "learning_rate": 1.8168932736906005e-05, - "loss": 0.544, - "step": 216040 - }, - { - "epoch": 1.9099524390459521, - "grad_norm": 4.22000789642334, - "learning_rate": 1.8167459349234133e-05, - "loss": 0.6118, - "step": 216050 - }, - { - "epoch": 1.9100408423062643, - "grad_norm": 3.014322280883789, - "learning_rate": 1.8165985961562262e-05, - "loss": 0.5632, - "step": 216060 - }, - { - "epoch": 1.9101292455665764, - "grad_norm": 4.061424255371094, - "learning_rate": 1.8164512573890393e-05, - "loss": 0.6439, - "step": 216070 - }, - { - "epoch": 1.9102176488268887, - "grad_norm": 2.7834503650665283, - "learning_rate": 1.8163039186218522e-05, - "loss": 0.6515, - "step": 216080 - }, - { - "epoch": 1.910306052087201, - "grad_norm": 2.2243871688842773, - "learning_rate": 1.816156579854665e-05, - "loss": 0.6516, - "step": 216090 - }, - { - "epoch": 1.9103944553475132, - "grad_norm": 8.861047744750977, - "learning_rate": 1.816009241087478e-05, - "loss": 0.6391, - "step": 216100 - }, - { - "epoch": 1.9104828586078253, - "grad_norm": 3.172020196914673, - "learning_rate": 1.815861902320291e-05, - "loss": 0.6371, - "step": 216110 - }, - { - "epoch": 1.9105712618681379, - "grad_norm": 11.315186500549316, - "learning_rate": 1.815714563553104e-05, - "loss": 0.5799, - "step": 216120 - }, - { - "epoch": 1.91065966512845, - "grad_norm": 1.1870808601379395, - "learning_rate": 1.8155672247859167e-05, - "loss": 0.5505, - "step": 216130 - }, - { - "epoch": 1.910748068388762, - "grad_norm": 2.998934507369995, - "learning_rate": 1.81541988601873e-05, - "loss": 0.6784, - "step": 216140 - }, - { - "epoch": 1.9108364716490744, - "grad_norm": 1.2750850915908813, - "learning_rate": 1.8152725472515427e-05, - "loss": 0.4746, - "step": 216150 - }, - { - "epoch": 1.9109248749093868, - "grad_norm": 1.8433935642242432, - "learning_rate": 1.8151252084843555e-05, - "loss": 0.5013, - "step": 216160 - }, - { - "epoch": 1.911013278169699, - "grad_norm": 1.366038203239441, - "learning_rate": 1.8149778697171684e-05, - "loss": 0.5223, - "step": 216170 - }, - { - "epoch": 1.911101681430011, - "grad_norm": 2.91780686378479, - "learning_rate": 1.8148305309499816e-05, - "loss": 0.6686, - "step": 216180 - }, - { - "epoch": 1.9111900846903234, - "grad_norm": 1.7641998529434204, - "learning_rate": 1.8146831921827944e-05, - "loss": 0.4539, - "step": 216190 - }, - { - "epoch": 1.9112784879506357, - "grad_norm": 1.4607517719268799, - "learning_rate": 1.8145358534156072e-05, - "loss": 0.6388, - "step": 216200 - }, - { - "epoch": 1.9113668912109478, - "grad_norm": 11.093215942382812, - "learning_rate": 1.81438851464842e-05, - "loss": 0.4907, - "step": 216210 - }, - { - "epoch": 1.91145529447126, - "grad_norm": 7.174412727355957, - "learning_rate": 1.8142411758812332e-05, - "loss": 0.6713, - "step": 216220 - }, - { - "epoch": 1.9115436977315723, - "grad_norm": 2.491236448287964, - "learning_rate": 1.814093837114046e-05, - "loss": 0.467, - "step": 216230 - }, - { - "epoch": 1.9116321009918846, - "grad_norm": 8.530804634094238, - "learning_rate": 1.813946498346859e-05, - "loss": 0.5929, - "step": 216240 - }, - { - "epoch": 1.9117205042521968, - "grad_norm": 1.5822875499725342, - "learning_rate": 1.813799159579672e-05, - "loss": 0.5307, - "step": 216250 - }, - { - "epoch": 1.911808907512509, - "grad_norm": 18.681421279907227, - "learning_rate": 1.813651820812485e-05, - "loss": 0.5232, - "step": 216260 - }, - { - "epoch": 1.9118973107728214, - "grad_norm": 1.1326762437820435, - "learning_rate": 1.8135044820452978e-05, - "loss": 0.5557, - "step": 216270 - }, - { - "epoch": 1.9119857140331336, - "grad_norm": 6.438721179962158, - "learning_rate": 1.8133571432781106e-05, - "loss": 0.6472, - "step": 216280 - }, - { - "epoch": 1.9120741172934457, - "grad_norm": 1.6215764284133911, - "learning_rate": 1.8132098045109238e-05, - "loss": 0.5147, - "step": 216290 - }, - { - "epoch": 1.912162520553758, - "grad_norm": 1.7961734533309937, - "learning_rate": 1.8130624657437366e-05, - "loss": 0.6435, - "step": 216300 - }, - { - "epoch": 1.9122509238140704, - "grad_norm": 1.5676223039627075, - "learning_rate": 1.8129151269765494e-05, - "loss": 0.5802, - "step": 216310 - }, - { - "epoch": 1.9123393270743825, - "grad_norm": 2.6900227069854736, - "learning_rate": 1.8127677882093626e-05, - "loss": 0.5485, - "step": 216320 - }, - { - "epoch": 1.9124277303346946, - "grad_norm": 15.534506797790527, - "learning_rate": 1.8126204494421754e-05, - "loss": 0.4505, - "step": 216330 - }, - { - "epoch": 1.912516133595007, - "grad_norm": 15.729284286499023, - "learning_rate": 1.8124731106749883e-05, - "loss": 0.7008, - "step": 216340 - }, - { - "epoch": 1.9126045368553193, - "grad_norm": 2.357236623764038, - "learning_rate": 1.8123257719078015e-05, - "loss": 0.5313, - "step": 216350 - }, - { - "epoch": 1.9126929401156314, - "grad_norm": 2.2297756671905518, - "learning_rate": 1.8121784331406143e-05, - "loss": 0.6155, - "step": 216360 - }, - { - "epoch": 1.9127813433759437, - "grad_norm": 1.433730959892273, - "learning_rate": 1.812031094373427e-05, - "loss": 0.5865, - "step": 216370 - }, - { - "epoch": 1.912869746636256, - "grad_norm": 1.051483392715454, - "learning_rate": 1.8118837556062403e-05, - "loss": 0.4809, - "step": 216380 - }, - { - "epoch": 1.9129581498965682, - "grad_norm": 2.712449073791504, - "learning_rate": 1.811736416839053e-05, - "loss": 0.572, - "step": 216390 - }, - { - "epoch": 1.9130465531568803, - "grad_norm": 1.288305640220642, - "learning_rate": 1.811589078071866e-05, - "loss": 0.5041, - "step": 216400 - }, - { - "epoch": 1.9131349564171927, - "grad_norm": 3.086259126663208, - "learning_rate": 1.811441739304679e-05, - "loss": 0.5439, - "step": 216410 - }, - { - "epoch": 1.913223359677505, - "grad_norm": 2.446774482727051, - "learning_rate": 1.811294400537492e-05, - "loss": 0.5209, - "step": 216420 - }, - { - "epoch": 1.9133117629378171, - "grad_norm": 2.3269996643066406, - "learning_rate": 1.8111470617703048e-05, - "loss": 0.4758, - "step": 216430 - }, - { - "epoch": 1.9134001661981292, - "grad_norm": 1.0755128860473633, - "learning_rate": 1.810999723003118e-05, - "loss": 0.607, - "step": 216440 - }, - { - "epoch": 1.9134885694584416, - "grad_norm": 3.610089063644409, - "learning_rate": 1.8108523842359308e-05, - "loss": 0.5732, - "step": 216450 - }, - { - "epoch": 1.913576972718754, - "grad_norm": 2.4240448474884033, - "learning_rate": 1.8107050454687437e-05, - "loss": 0.5613, - "step": 216460 - }, - { - "epoch": 1.913665375979066, - "grad_norm": 0.6894088387489319, - "learning_rate": 1.810557706701557e-05, - "loss": 0.4501, - "step": 216470 - }, - { - "epoch": 1.9137537792393784, - "grad_norm": 7.4250874519348145, - "learning_rate": 1.8104103679343697e-05, - "loss": 0.5233, - "step": 216480 - }, - { - "epoch": 1.9138421824996907, - "grad_norm": 1.0874172449111938, - "learning_rate": 1.8102630291671825e-05, - "loss": 0.525, - "step": 216490 - }, - { - "epoch": 1.9139305857600029, - "grad_norm": 1.8278764486312866, - "learning_rate": 1.8101156903999953e-05, - "loss": 0.6159, - "step": 216500 - }, - { - "epoch": 1.914018989020315, - "grad_norm": 5.650091171264648, - "learning_rate": 1.8099683516328085e-05, - "loss": 0.6809, - "step": 216510 - }, - { - "epoch": 1.9141073922806273, - "grad_norm": 3.1419782638549805, - "learning_rate": 1.8098210128656213e-05, - "loss": 0.585, - "step": 216520 - }, - { - "epoch": 1.9141957955409397, - "grad_norm": 8.009008407592773, - "learning_rate": 1.8096736740984342e-05, - "loss": 0.6204, - "step": 216530 - }, - { - "epoch": 1.9142841988012518, - "grad_norm": 2.5877749919891357, - "learning_rate": 1.8095263353312474e-05, - "loss": 0.4538, - "step": 216540 - }, - { - "epoch": 1.914372602061564, - "grad_norm": 0.9543381333351135, - "learning_rate": 1.8093789965640602e-05, - "loss": 0.6335, - "step": 216550 - }, - { - "epoch": 1.9144610053218762, - "grad_norm": 3.4716832637786865, - "learning_rate": 1.809231657796873e-05, - "loss": 0.5878, - "step": 216560 - }, - { - "epoch": 1.9145494085821886, - "grad_norm": 0.9770382046699524, - "learning_rate": 1.809084319029686e-05, - "loss": 0.5538, - "step": 216570 - }, - { - "epoch": 1.9146378118425007, - "grad_norm": 4.163053512573242, - "learning_rate": 1.808936980262499e-05, - "loss": 0.6316, - "step": 216580 - }, - { - "epoch": 1.914726215102813, - "grad_norm": 2.317775249481201, - "learning_rate": 1.808789641495312e-05, - "loss": 0.6029, - "step": 216590 - }, - { - "epoch": 1.9148146183631254, - "grad_norm": 1.217799186706543, - "learning_rate": 1.8086423027281247e-05, - "loss": 0.5065, - "step": 216600 - }, - { - "epoch": 1.9149030216234375, - "grad_norm": 2.2359209060668945, - "learning_rate": 1.808494963960938e-05, - "loss": 0.6597, - "step": 216610 - }, - { - "epoch": 1.9149914248837496, - "grad_norm": 2.1642236709594727, - "learning_rate": 1.8083476251937507e-05, - "loss": 0.7001, - "step": 216620 - }, - { - "epoch": 1.915079828144062, - "grad_norm": 1.2121096849441528, - "learning_rate": 1.8082002864265636e-05, - "loss": 0.6169, - "step": 216630 - }, - { - "epoch": 1.9151682314043743, - "grad_norm": 2.152376651763916, - "learning_rate": 1.8080529476593764e-05, - "loss": 0.472, - "step": 216640 - }, - { - "epoch": 1.9152566346646864, - "grad_norm": 1.3939648866653442, - "learning_rate": 1.8079056088921896e-05, - "loss": 0.595, - "step": 216650 - }, - { - "epoch": 1.9153450379249986, - "grad_norm": 1.5826573371887207, - "learning_rate": 1.8077582701250024e-05, - "loss": 0.6674, - "step": 216660 - }, - { - "epoch": 1.915433441185311, - "grad_norm": 1.1853008270263672, - "learning_rate": 1.8076109313578152e-05, - "loss": 0.5838, - "step": 216670 - }, - { - "epoch": 1.9155218444456232, - "grad_norm": 1.9050090312957764, - "learning_rate": 1.807463592590628e-05, - "loss": 0.647, - "step": 216680 - }, - { - "epoch": 1.9156102477059354, - "grad_norm": 4.199453830718994, - "learning_rate": 1.8073162538234412e-05, - "loss": 0.5724, - "step": 216690 - }, - { - "epoch": 1.9156986509662475, - "grad_norm": 1.700758695602417, - "learning_rate": 1.807168915056254e-05, - "loss": 0.5006, - "step": 216700 - }, - { - "epoch": 1.91578705422656, - "grad_norm": 22.516199111938477, - "learning_rate": 1.807021576289067e-05, - "loss": 0.6828, - "step": 216710 - }, - { - "epoch": 1.9158754574868722, - "grad_norm": 1.4677587747573853, - "learning_rate": 1.80687423752188e-05, - "loss": 0.5302, - "step": 216720 - }, - { - "epoch": 1.9159638607471843, - "grad_norm": 2.239807367324829, - "learning_rate": 1.806726898754693e-05, - "loss": 0.7154, - "step": 216730 - }, - { - "epoch": 1.9160522640074966, - "grad_norm": 9.4066743850708, - "learning_rate": 1.8065795599875058e-05, - "loss": 0.6155, - "step": 216740 - }, - { - "epoch": 1.916140667267809, - "grad_norm": 2.0142173767089844, - "learning_rate": 1.8064322212203186e-05, - "loss": 0.5795, - "step": 216750 - }, - { - "epoch": 1.916229070528121, - "grad_norm": 16.098608016967773, - "learning_rate": 1.8062848824531318e-05, - "loss": 0.5251, - "step": 216760 - }, - { - "epoch": 1.9163174737884332, - "grad_norm": 2.477219820022583, - "learning_rate": 1.8061375436859446e-05, - "loss": 0.5682, - "step": 216770 - }, - { - "epoch": 1.9164058770487455, - "grad_norm": 1.6210627555847168, - "learning_rate": 1.8059902049187574e-05, - "loss": 0.539, - "step": 216780 - }, - { - "epoch": 1.9164942803090579, - "grad_norm": 1.6682857275009155, - "learning_rate": 1.8058428661515706e-05, - "loss": 0.582, - "step": 216790 - }, - { - "epoch": 1.91658268356937, - "grad_norm": 1.1472681760787964, - "learning_rate": 1.8056955273843834e-05, - "loss": 0.5368, - "step": 216800 - }, - { - "epoch": 1.9166710868296821, - "grad_norm": 2.036644697189331, - "learning_rate": 1.8055481886171963e-05, - "loss": 0.579, - "step": 216810 - }, - { - "epoch": 1.9167594900899945, - "grad_norm": 6.5936479568481445, - "learning_rate": 1.805400849850009e-05, - "loss": 0.71, - "step": 216820 - }, - { - "epoch": 1.9168478933503068, - "grad_norm": 1.057573914527893, - "learning_rate": 1.8052535110828223e-05, - "loss": 0.5328, - "step": 216830 - }, - { - "epoch": 1.916936296610619, - "grad_norm": 1.4324116706848145, - "learning_rate": 1.805106172315635e-05, - "loss": 0.5921, - "step": 216840 - }, - { - "epoch": 1.9170246998709313, - "grad_norm": 1.5914580821990967, - "learning_rate": 1.804958833548448e-05, - "loss": 0.6378, - "step": 216850 - }, - { - "epoch": 1.9171131031312436, - "grad_norm": 3.1657867431640625, - "learning_rate": 1.8048114947812608e-05, - "loss": 0.5323, - "step": 216860 - }, - { - "epoch": 1.9172015063915557, - "grad_norm": 1.5273853540420532, - "learning_rate": 1.804664156014074e-05, - "loss": 0.4514, - "step": 216870 - }, - { - "epoch": 1.9172899096518679, - "grad_norm": 10.381452560424805, - "learning_rate": 1.8045168172468868e-05, - "loss": 0.5863, - "step": 216880 - }, - { - "epoch": 1.9173783129121802, - "grad_norm": 8.416742324829102, - "learning_rate": 1.8043694784796996e-05, - "loss": 0.5755, - "step": 216890 - }, - { - "epoch": 1.9174667161724925, - "grad_norm": 9.627915382385254, - "learning_rate": 1.8042221397125128e-05, - "loss": 0.6754, - "step": 216900 - }, - { - "epoch": 1.9175551194328047, - "grad_norm": 2.02286696434021, - "learning_rate": 1.8040748009453257e-05, - "loss": 0.565, - "step": 216910 - }, - { - "epoch": 1.9176435226931168, - "grad_norm": 1.7678122520446777, - "learning_rate": 1.8039274621781385e-05, - "loss": 0.4378, - "step": 216920 - }, - { - "epoch": 1.9177319259534291, - "grad_norm": 1.2858494520187378, - "learning_rate": 1.8037801234109513e-05, - "loss": 0.535, - "step": 216930 - }, - { - "epoch": 1.9178203292137415, - "grad_norm": 2.36051344871521, - "learning_rate": 1.8036327846437645e-05, - "loss": 0.4137, - "step": 216940 - }, - { - "epoch": 1.9179087324740536, - "grad_norm": 1.4821425676345825, - "learning_rate": 1.8034854458765773e-05, - "loss": 0.5902, - "step": 216950 - }, - { - "epoch": 1.917997135734366, - "grad_norm": 12.057268142700195, - "learning_rate": 1.8033381071093902e-05, - "loss": 0.5828, - "step": 216960 - }, - { - "epoch": 1.9180855389946783, - "grad_norm": 1.9689851999282837, - "learning_rate": 1.8031907683422033e-05, - "loss": 0.5527, - "step": 216970 - }, - { - "epoch": 1.9181739422549904, - "grad_norm": 1.2902332544326782, - "learning_rate": 1.8030434295750162e-05, - "loss": 0.5786, - "step": 216980 - }, - { - "epoch": 1.9182623455153025, - "grad_norm": 8.02831745147705, - "learning_rate": 1.802896090807829e-05, - "loss": 0.5161, - "step": 216990 - }, - { - "epoch": 1.9183507487756148, - "grad_norm": 2.5328893661499023, - "learning_rate": 1.802748752040642e-05, - "loss": 0.4753, - "step": 217000 - }, - { - "epoch": 1.9184391520359272, - "grad_norm": 1.1034444570541382, - "learning_rate": 1.802601413273455e-05, - "loss": 0.51, - "step": 217010 - }, - { - "epoch": 1.9185275552962393, - "grad_norm": 2.585118055343628, - "learning_rate": 1.802454074506268e-05, - "loss": 0.5873, - "step": 217020 - }, - { - "epoch": 1.9186159585565514, - "grad_norm": 1.4094904661178589, - "learning_rate": 1.8023067357390807e-05, - "loss": 0.5913, - "step": 217030 - }, - { - "epoch": 1.9187043618168638, - "grad_norm": 5.6701741218566895, - "learning_rate": 1.8021593969718935e-05, - "loss": 0.5623, - "step": 217040 - }, - { - "epoch": 1.918792765077176, - "grad_norm": 18.261985778808594, - "learning_rate": 1.8020120582047067e-05, - "loss": 0.5366, - "step": 217050 - }, - { - "epoch": 1.9188811683374882, - "grad_norm": 1.955998182296753, - "learning_rate": 1.8018647194375195e-05, - "loss": 0.5805, - "step": 217060 - }, - { - "epoch": 1.9189695715978006, - "grad_norm": 5.888978958129883, - "learning_rate": 1.8017173806703324e-05, - "loss": 0.5649, - "step": 217070 - }, - { - "epoch": 1.919057974858113, - "grad_norm": 8.626385688781738, - "learning_rate": 1.8015700419031456e-05, - "loss": 0.5281, - "step": 217080 - }, - { - "epoch": 1.919146378118425, - "grad_norm": 3.256411552429199, - "learning_rate": 1.8014227031359584e-05, - "loss": 0.5578, - "step": 217090 - }, - { - "epoch": 1.9192347813787372, - "grad_norm": 1.712684988975525, - "learning_rate": 1.8012753643687712e-05, - "loss": 0.5281, - "step": 217100 - }, - { - "epoch": 1.9193231846390495, - "grad_norm": 5.248591423034668, - "learning_rate": 1.801128025601584e-05, - "loss": 0.4879, - "step": 217110 - }, - { - "epoch": 1.9194115878993618, - "grad_norm": 5.866351127624512, - "learning_rate": 1.8009806868343972e-05, - "loss": 0.6175, - "step": 217120 - }, - { - "epoch": 1.919499991159674, - "grad_norm": 2.3572940826416016, - "learning_rate": 1.80083334806721e-05, - "loss": 0.5237, - "step": 217130 - }, - { - "epoch": 1.919588394419986, - "grad_norm": 2.529583215713501, - "learning_rate": 1.800686009300023e-05, - "loss": 0.5641, - "step": 217140 - }, - { - "epoch": 1.9196767976802984, - "grad_norm": 5.777018070220947, - "learning_rate": 1.8005386705328357e-05, - "loss": 0.5322, - "step": 217150 - }, - { - "epoch": 1.9197652009406108, - "grad_norm": 4.584105968475342, - "learning_rate": 1.800391331765649e-05, - "loss": 0.5333, - "step": 217160 - }, - { - "epoch": 1.9198536042009229, - "grad_norm": 7.387413024902344, - "learning_rate": 1.8002439929984617e-05, - "loss": 0.5365, - "step": 217170 - }, - { - "epoch": 1.9199420074612352, - "grad_norm": 15.710243225097656, - "learning_rate": 1.8000966542312746e-05, - "loss": 0.6798, - "step": 217180 - }, - { - "epoch": 1.9200304107215476, - "grad_norm": 3.0048375129699707, - "learning_rate": 1.7999493154640878e-05, - "loss": 0.4851, - "step": 217190 - }, - { - "epoch": 1.9201188139818597, - "grad_norm": 2.6067099571228027, - "learning_rate": 1.7998019766969006e-05, - "loss": 0.5282, - "step": 217200 - }, - { - "epoch": 1.9202072172421718, - "grad_norm": 5.25256872177124, - "learning_rate": 1.7996546379297134e-05, - "loss": 0.5931, - "step": 217210 - }, - { - "epoch": 1.9202956205024841, - "grad_norm": 3.8436293601989746, - "learning_rate": 1.7995072991625263e-05, - "loss": 0.555, - "step": 217220 - }, - { - "epoch": 1.9203840237627965, - "grad_norm": 2.1344540119171143, - "learning_rate": 1.7993599603953394e-05, - "loss": 0.4882, - "step": 217230 - }, - { - "epoch": 1.9204724270231086, - "grad_norm": 1.0375187397003174, - "learning_rate": 1.7992126216281523e-05, - "loss": 0.5093, - "step": 217240 - }, - { - "epoch": 1.9205608302834207, - "grad_norm": 8.884085655212402, - "learning_rate": 1.799065282860965e-05, - "loss": 0.5275, - "step": 217250 - }, - { - "epoch": 1.920649233543733, - "grad_norm": 2.383563756942749, - "learning_rate": 1.7989179440937783e-05, - "loss": 0.5701, - "step": 217260 - }, - { - "epoch": 1.9207376368040454, - "grad_norm": 1.274013638496399, - "learning_rate": 1.798770605326591e-05, - "loss": 0.5523, - "step": 217270 - }, - { - "epoch": 1.9208260400643575, - "grad_norm": 1.9956133365631104, - "learning_rate": 1.798623266559404e-05, - "loss": 0.7067, - "step": 217280 - }, - { - "epoch": 1.9209144433246697, - "grad_norm": 2.0668630599975586, - "learning_rate": 1.798475927792217e-05, - "loss": 0.6458, - "step": 217290 - }, - { - "epoch": 1.9210028465849822, - "grad_norm": 7.267787933349609, - "learning_rate": 1.79832858902503e-05, - "loss": 0.5819, - "step": 217300 - }, - { - "epoch": 1.9210912498452943, - "grad_norm": 3.4679811000823975, - "learning_rate": 1.7981812502578428e-05, - "loss": 0.6455, - "step": 217310 - }, - { - "epoch": 1.9211796531056065, - "grad_norm": 2.465635061264038, - "learning_rate": 1.798033911490656e-05, - "loss": 0.581, - "step": 217320 - }, - { - "epoch": 1.9212680563659188, - "grad_norm": 2.237391710281372, - "learning_rate": 1.7978865727234688e-05, - "loss": 0.545, - "step": 217330 - }, - { - "epoch": 1.9213564596262311, - "grad_norm": 2.048137903213501, - "learning_rate": 1.7977392339562816e-05, - "loss": 0.5683, - "step": 217340 - }, - { - "epoch": 1.9214448628865433, - "grad_norm": 9.93455982208252, - "learning_rate": 1.7975918951890948e-05, - "loss": 0.6699, - "step": 217350 - }, - { - "epoch": 1.9215332661468554, - "grad_norm": 2.37434458732605, - "learning_rate": 1.7974445564219077e-05, - "loss": 0.5423, - "step": 217360 - }, - { - "epoch": 1.9216216694071677, - "grad_norm": 8.60255241394043, - "learning_rate": 1.7972972176547205e-05, - "loss": 0.5786, - "step": 217370 - }, - { - "epoch": 1.92171007266748, - "grad_norm": 10.527209281921387, - "learning_rate": 1.7971498788875337e-05, - "loss": 0.5217, - "step": 217380 - }, - { - "epoch": 1.9217984759277922, - "grad_norm": 1.200935959815979, - "learning_rate": 1.7970025401203465e-05, - "loss": 0.4956, - "step": 217390 - }, - { - "epoch": 1.9218868791881043, - "grad_norm": 4.9400315284729, - "learning_rate": 1.7968552013531593e-05, - "loss": 0.5778, - "step": 217400 - }, - { - "epoch": 1.9219752824484166, - "grad_norm": 3.8515446186065674, - "learning_rate": 1.7967078625859725e-05, - "loss": 0.6323, - "step": 217410 - }, - { - "epoch": 1.922063685708729, - "grad_norm": 3.5395429134368896, - "learning_rate": 1.7965605238187853e-05, - "loss": 0.5324, - "step": 217420 - }, - { - "epoch": 1.922152088969041, - "grad_norm": 3.494119167327881, - "learning_rate": 1.7964131850515982e-05, - "loss": 0.6355, - "step": 217430 - }, - { - "epoch": 1.9222404922293534, - "grad_norm": 0.7550321817398071, - "learning_rate": 1.7962658462844114e-05, - "loss": 0.506, - "step": 217440 - }, - { - "epoch": 1.9223288954896658, - "grad_norm": 2.566480875015259, - "learning_rate": 1.7961185075172242e-05, - "loss": 0.4371, - "step": 217450 - }, - { - "epoch": 1.922417298749978, - "grad_norm": 4.824710369110107, - "learning_rate": 1.795971168750037e-05, - "loss": 0.5889, - "step": 217460 - }, - { - "epoch": 1.92250570201029, - "grad_norm": 1.0541762113571167, - "learning_rate": 1.79582382998285e-05, - "loss": 0.5156, - "step": 217470 - }, - { - "epoch": 1.9225941052706024, - "grad_norm": 3.598564386367798, - "learning_rate": 1.795676491215663e-05, - "loss": 0.5931, - "step": 217480 - }, - { - "epoch": 1.9226825085309147, - "grad_norm": 2.303901195526123, - "learning_rate": 1.795529152448476e-05, - "loss": 0.5181, - "step": 217490 - }, - { - "epoch": 1.9227709117912268, - "grad_norm": 2.465131998062134, - "learning_rate": 1.7953818136812887e-05, - "loss": 0.6425, - "step": 217500 - }, - { - "epoch": 1.922859315051539, - "grad_norm": 1.9853037595748901, - "learning_rate": 1.7952344749141015e-05, - "loss": 0.5342, - "step": 217510 - }, - { - "epoch": 1.9229477183118513, - "grad_norm": 2.2744500637054443, - "learning_rate": 1.7950871361469147e-05, - "loss": 0.6323, - "step": 217520 - }, - { - "epoch": 1.9230361215721636, - "grad_norm": 4.011896133422852, - "learning_rate": 1.7949397973797275e-05, - "loss": 0.5177, - "step": 217530 - }, - { - "epoch": 1.9231245248324758, - "grad_norm": 1.1928479671478271, - "learning_rate": 1.7947924586125404e-05, - "loss": 0.5628, - "step": 217540 - }, - { - "epoch": 1.923212928092788, - "grad_norm": 3.8024237155914307, - "learning_rate": 1.7946451198453536e-05, - "loss": 0.5659, - "step": 217550 - }, - { - "epoch": 1.9233013313531004, - "grad_norm": 3.6223812103271484, - "learning_rate": 1.7944977810781664e-05, - "loss": 0.5637, - "step": 217560 - }, - { - "epoch": 1.9233897346134126, - "grad_norm": 1.7467900514602661, - "learning_rate": 1.7943504423109792e-05, - "loss": 0.4282, - "step": 217570 - }, - { - "epoch": 1.9234781378737247, - "grad_norm": 1.9814823865890503, - "learning_rate": 1.794203103543792e-05, - "loss": 0.5673, - "step": 217580 - }, - { - "epoch": 1.923566541134037, - "grad_norm": 2.8140225410461426, - "learning_rate": 1.7940557647766052e-05, - "loss": 0.7701, - "step": 217590 - }, - { - "epoch": 1.9236549443943494, - "grad_norm": 2.0217604637145996, - "learning_rate": 1.793908426009418e-05, - "loss": 0.6374, - "step": 217600 - }, - { - "epoch": 1.9237433476546615, - "grad_norm": 2.1382977962493896, - "learning_rate": 1.793761087242231e-05, - "loss": 0.5756, - "step": 217610 - }, - { - "epoch": 1.9238317509149736, - "grad_norm": 2.7940778732299805, - "learning_rate": 1.7936137484750437e-05, - "loss": 0.4617, - "step": 217620 - }, - { - "epoch": 1.923920154175286, - "grad_norm": 2.182830810546875, - "learning_rate": 1.793466409707857e-05, - "loss": 0.5728, - "step": 217630 - }, - { - "epoch": 1.9240085574355983, - "grad_norm": 9.27804946899414, - "learning_rate": 1.7933190709406698e-05, - "loss": 0.6668, - "step": 217640 - }, - { - "epoch": 1.9240969606959104, - "grad_norm": 1.4358670711517334, - "learning_rate": 1.7931717321734826e-05, - "loss": 0.6601, - "step": 217650 - }, - { - "epoch": 1.9241853639562227, - "grad_norm": 1.3430434465408325, - "learning_rate": 1.7930243934062958e-05, - "loss": 0.5591, - "step": 217660 - }, - { - "epoch": 1.924273767216535, - "grad_norm": 2.20162296295166, - "learning_rate": 1.7928770546391086e-05, - "loss": 0.6552, - "step": 217670 - }, - { - "epoch": 1.9243621704768472, - "grad_norm": 6.015159606933594, - "learning_rate": 1.7927297158719214e-05, - "loss": 0.6009, - "step": 217680 - }, - { - "epoch": 1.9244505737371593, - "grad_norm": 1.3070344924926758, - "learning_rate": 1.7925823771047343e-05, - "loss": 0.5133, - "step": 217690 - }, - { - "epoch": 1.9245389769974717, - "grad_norm": 3.3622984886169434, - "learning_rate": 1.7924350383375474e-05, - "loss": 0.6422, - "step": 217700 - }, - { - "epoch": 1.924627380257784, - "grad_norm": 5.397711753845215, - "learning_rate": 1.7922876995703603e-05, - "loss": 0.5324, - "step": 217710 - }, - { - "epoch": 1.9247157835180961, - "grad_norm": 4.792809009552002, - "learning_rate": 1.792140360803173e-05, - "loss": 0.6001, - "step": 217720 - }, - { - "epoch": 1.9248041867784083, - "grad_norm": 2.090374708175659, - "learning_rate": 1.7919930220359863e-05, - "loss": 0.613, - "step": 217730 - }, - { - "epoch": 1.9248925900387206, - "grad_norm": 2.0831427574157715, - "learning_rate": 1.791845683268799e-05, - "loss": 0.5197, - "step": 217740 - }, - { - "epoch": 1.924980993299033, - "grad_norm": 9.81713581085205, - "learning_rate": 1.791698344501612e-05, - "loss": 0.5961, - "step": 217750 - }, - { - "epoch": 1.925069396559345, - "grad_norm": 2.418062925338745, - "learning_rate": 1.7915510057344248e-05, - "loss": 0.5523, - "step": 217760 - }, - { - "epoch": 1.9251577998196574, - "grad_norm": 5.5770392417907715, - "learning_rate": 1.791403666967238e-05, - "loss": 0.5343, - "step": 217770 - }, - { - "epoch": 1.9252462030799697, - "grad_norm": 3.117497444152832, - "learning_rate": 1.7912563282000508e-05, - "loss": 0.7334, - "step": 217780 - }, - { - "epoch": 1.9253346063402819, - "grad_norm": 1.4018239974975586, - "learning_rate": 1.7911089894328636e-05, - "loss": 0.5547, - "step": 217790 - }, - { - "epoch": 1.925423009600594, - "grad_norm": 1.0717302560806274, - "learning_rate": 1.7909616506656765e-05, - "loss": 0.6232, - "step": 217800 - }, - { - "epoch": 1.9255114128609063, - "grad_norm": 3.4991631507873535, - "learning_rate": 1.7908143118984896e-05, - "loss": 0.6773, - "step": 217810 - }, - { - "epoch": 1.9255998161212187, - "grad_norm": 2.2995593547821045, - "learning_rate": 1.7906669731313025e-05, - "loss": 0.6265, - "step": 217820 - }, - { - "epoch": 1.9256882193815308, - "grad_norm": 4.029776096343994, - "learning_rate": 1.7905196343641153e-05, - "loss": 0.482, - "step": 217830 - }, - { - "epoch": 1.925776622641843, - "grad_norm": 4.962882995605469, - "learning_rate": 1.7903722955969285e-05, - "loss": 0.5714, - "step": 217840 - }, - { - "epoch": 1.9258650259021552, - "grad_norm": 2.3954861164093018, - "learning_rate": 1.7902249568297413e-05, - "loss": 0.5717, - "step": 217850 - }, - { - "epoch": 1.9259534291624676, - "grad_norm": 1.405086636543274, - "learning_rate": 1.790077618062554e-05, - "loss": 0.564, - "step": 217860 - }, - { - "epoch": 1.9260418324227797, - "grad_norm": 2.1499006748199463, - "learning_rate": 1.789930279295367e-05, - "loss": 0.5252, - "step": 217870 - }, - { - "epoch": 1.9261302356830918, - "grad_norm": 1.116417646408081, - "learning_rate": 1.7897829405281802e-05, - "loss": 0.5586, - "step": 217880 - }, - { - "epoch": 1.9262186389434044, - "grad_norm": 2.3069539070129395, - "learning_rate": 1.789635601760993e-05, - "loss": 0.5022, - "step": 217890 - }, - { - "epoch": 1.9263070422037165, - "grad_norm": 3.9958276748657227, - "learning_rate": 1.789488262993806e-05, - "loss": 0.5591, - "step": 217900 - }, - { - "epoch": 1.9263954454640286, - "grad_norm": 3.916118860244751, - "learning_rate": 1.789340924226619e-05, - "loss": 0.5871, - "step": 217910 - }, - { - "epoch": 1.926483848724341, - "grad_norm": 1.1679779291152954, - "learning_rate": 1.789193585459432e-05, - "loss": 0.6553, - "step": 217920 - }, - { - "epoch": 1.9265722519846533, - "grad_norm": 3.2080304622650146, - "learning_rate": 1.7890462466922447e-05, - "loss": 0.494, - "step": 217930 - }, - { - "epoch": 1.9266606552449654, - "grad_norm": 1.95858895778656, - "learning_rate": 1.7888989079250575e-05, - "loss": 0.433, - "step": 217940 - }, - { - "epoch": 1.9267490585052776, - "grad_norm": 2.56058931350708, - "learning_rate": 1.7887515691578707e-05, - "loss": 0.658, - "step": 217950 - }, - { - "epoch": 1.92683746176559, - "grad_norm": 1.4632349014282227, - "learning_rate": 1.7886042303906835e-05, - "loss": 0.5436, - "step": 217960 - }, - { - "epoch": 1.9269258650259022, - "grad_norm": 8.628364562988281, - "learning_rate": 1.7884568916234964e-05, - "loss": 0.5751, - "step": 217970 - }, - { - "epoch": 1.9270142682862144, - "grad_norm": 2.563814640045166, - "learning_rate": 1.7883095528563092e-05, - "loss": 0.6268, - "step": 217980 - }, - { - "epoch": 1.9271026715465265, - "grad_norm": 2.3320868015289307, - "learning_rate": 1.7881622140891224e-05, - "loss": 0.5867, - "step": 217990 - }, - { - "epoch": 1.9271910748068388, - "grad_norm": 2.9975430965423584, - "learning_rate": 1.7880148753219352e-05, - "loss": 0.6685, - "step": 218000 - }, - { - "epoch": 1.9272794780671512, - "grad_norm": 5.7594170570373535, - "learning_rate": 1.787867536554748e-05, - "loss": 0.4779, - "step": 218010 - }, - { - "epoch": 1.9273678813274633, - "grad_norm": 4.022233486175537, - "learning_rate": 1.7877201977875612e-05, - "loss": 0.6474, - "step": 218020 - }, - { - "epoch": 1.9274562845877756, - "grad_norm": 1.2713489532470703, - "learning_rate": 1.787572859020374e-05, - "loss": 0.5509, - "step": 218030 - }, - { - "epoch": 1.927544687848088, - "grad_norm": 1.3132531642913818, - "learning_rate": 1.787425520253187e-05, - "loss": 0.5761, - "step": 218040 - }, - { - "epoch": 1.9276330911084, - "grad_norm": 3.1133525371551514, - "learning_rate": 1.7872781814859997e-05, - "loss": 0.586, - "step": 218050 - }, - { - "epoch": 1.9277214943687122, - "grad_norm": 1.876328468322754, - "learning_rate": 1.787130842718813e-05, - "loss": 0.5022, - "step": 218060 - }, - { - "epoch": 1.9278098976290245, - "grad_norm": 2.0243823528289795, - "learning_rate": 1.7869835039516257e-05, - "loss": 0.5588, - "step": 218070 - }, - { - "epoch": 1.927898300889337, - "grad_norm": 3.532790184020996, - "learning_rate": 1.7868361651844386e-05, - "loss": 0.4585, - "step": 218080 - }, - { - "epoch": 1.927986704149649, - "grad_norm": 3.1207637786865234, - "learning_rate": 1.7866888264172514e-05, - "loss": 0.6022, - "step": 218090 - }, - { - "epoch": 1.9280751074099611, - "grad_norm": 10.706979751586914, - "learning_rate": 1.7865414876500646e-05, - "loss": 0.4778, - "step": 218100 - }, - { - "epoch": 1.9281635106702735, - "grad_norm": 3.146976947784424, - "learning_rate": 1.7863941488828774e-05, - "loss": 0.4929, - "step": 218110 - }, - { - "epoch": 1.9282519139305858, - "grad_norm": 1.6596893072128296, - "learning_rate": 1.7862468101156903e-05, - "loss": 0.6311, - "step": 218120 - }, - { - "epoch": 1.928340317190898, - "grad_norm": 3.6359317302703857, - "learning_rate": 1.7860994713485034e-05, - "loss": 0.5591, - "step": 218130 - }, - { - "epoch": 1.9284287204512103, - "grad_norm": 3.193084955215454, - "learning_rate": 1.7859521325813163e-05, - "loss": 0.4278, - "step": 218140 - }, - { - "epoch": 1.9285171237115226, - "grad_norm": 1.1501078605651855, - "learning_rate": 1.785804793814129e-05, - "loss": 0.516, - "step": 218150 - }, - { - "epoch": 1.9286055269718347, - "grad_norm": 2.0530176162719727, - "learning_rate": 1.785657455046942e-05, - "loss": 0.5644, - "step": 218160 - }, - { - "epoch": 1.9286939302321469, - "grad_norm": 2.1958062648773193, - "learning_rate": 1.785510116279755e-05, - "loss": 0.5224, - "step": 218170 - }, - { - "epoch": 1.9287823334924592, - "grad_norm": 8.756962776184082, - "learning_rate": 1.785362777512568e-05, - "loss": 0.5628, - "step": 218180 - }, - { - "epoch": 1.9288707367527715, - "grad_norm": 2.0489983558654785, - "learning_rate": 1.7852154387453808e-05, - "loss": 0.5238, - "step": 218190 - }, - { - "epoch": 1.9289591400130837, - "grad_norm": 3.329068660736084, - "learning_rate": 1.785068099978194e-05, - "loss": 0.5265, - "step": 218200 - }, - { - "epoch": 1.9290475432733958, - "grad_norm": 2.248135566711426, - "learning_rate": 1.7849207612110068e-05, - "loss": 0.5407, - "step": 218210 - }, - { - "epoch": 1.9291359465337081, - "grad_norm": 1.5821142196655273, - "learning_rate": 1.7847734224438196e-05, - "loss": 0.6992, - "step": 218220 - }, - { - "epoch": 1.9292243497940205, - "grad_norm": 3.011342763900757, - "learning_rate": 1.7846260836766328e-05, - "loss": 0.47, - "step": 218230 - }, - { - "epoch": 1.9293127530543326, - "grad_norm": 3.4710450172424316, - "learning_rate": 1.7844787449094456e-05, - "loss": 0.6613, - "step": 218240 - }, - { - "epoch": 1.929401156314645, - "grad_norm": 3.8228461742401123, - "learning_rate": 1.7843314061422585e-05, - "loss": 0.7031, - "step": 218250 - }, - { - "epoch": 1.9294895595749573, - "grad_norm": 2.7415707111358643, - "learning_rate": 1.7841840673750716e-05, - "loss": 0.56, - "step": 218260 - }, - { - "epoch": 1.9295779628352694, - "grad_norm": 1.6685841083526611, - "learning_rate": 1.7840367286078845e-05, - "loss": 0.6108, - "step": 218270 - }, - { - "epoch": 1.9296663660955815, - "grad_norm": 9.429146766662598, - "learning_rate": 1.7838893898406973e-05, - "loss": 0.5643, - "step": 218280 - }, - { - "epoch": 1.9297547693558939, - "grad_norm": 2.2939939498901367, - "learning_rate": 1.7837420510735105e-05, - "loss": 0.5447, - "step": 218290 - }, - { - "epoch": 1.9298431726162062, - "grad_norm": 2.8022780418395996, - "learning_rate": 1.7835947123063233e-05, - "loss": 0.5759, - "step": 218300 - }, - { - "epoch": 1.9299315758765183, - "grad_norm": 3.5208373069763184, - "learning_rate": 1.783447373539136e-05, - "loss": 0.4799, - "step": 218310 - }, - { - "epoch": 1.9300199791368304, - "grad_norm": 1.9998112916946411, - "learning_rate": 1.7833000347719493e-05, - "loss": 0.6462, - "step": 218320 - }, - { - "epoch": 1.9301083823971428, - "grad_norm": 2.380908727645874, - "learning_rate": 1.7831526960047622e-05, - "loss": 0.5386, - "step": 218330 - }, - { - "epoch": 1.9301967856574551, - "grad_norm": 1.969097375869751, - "learning_rate": 1.783005357237575e-05, - "loss": 0.7182, - "step": 218340 - }, - { - "epoch": 1.9302851889177672, - "grad_norm": 9.583781242370605, - "learning_rate": 1.7828580184703882e-05, - "loss": 0.4504, - "step": 218350 - }, - { - "epoch": 1.9303735921780796, - "grad_norm": 14.661343574523926, - "learning_rate": 1.782710679703201e-05, - "loss": 0.4807, - "step": 218360 - }, - { - "epoch": 1.930461995438392, - "grad_norm": 2.680039882659912, - "learning_rate": 1.782563340936014e-05, - "loss": 0.5022, - "step": 218370 - }, - { - "epoch": 1.930550398698704, - "grad_norm": 22.676002502441406, - "learning_rate": 1.782416002168827e-05, - "loss": 0.6186, - "step": 218380 - }, - { - "epoch": 1.9306388019590162, - "grad_norm": 5.862962245941162, - "learning_rate": 1.78226866340164e-05, - "loss": 0.5968, - "step": 218390 - }, - { - "epoch": 1.9307272052193285, - "grad_norm": 3.8336522579193115, - "learning_rate": 1.7821213246344527e-05, - "loss": 0.5934, - "step": 218400 - }, - { - "epoch": 1.9308156084796408, - "grad_norm": 2.620485782623291, - "learning_rate": 1.7819739858672655e-05, - "loss": 0.6129, - "step": 218410 - }, - { - "epoch": 1.930904011739953, - "grad_norm": 3.7110278606414795, - "learning_rate": 1.7818266471000787e-05, - "loss": 0.5808, - "step": 218420 - }, - { - "epoch": 1.930992415000265, - "grad_norm": 1.9464308023452759, - "learning_rate": 1.7816793083328915e-05, - "loss": 0.6752, - "step": 218430 - }, - { - "epoch": 1.9310808182605774, - "grad_norm": 2.341123342514038, - "learning_rate": 1.7815319695657044e-05, - "loss": 0.6382, - "step": 218440 - }, - { - "epoch": 1.9311692215208898, - "grad_norm": 1.8072208166122437, - "learning_rate": 1.7813846307985172e-05, - "loss": 0.5496, - "step": 218450 - }, - { - "epoch": 1.9312576247812019, - "grad_norm": 7.883699893951416, - "learning_rate": 1.7812372920313304e-05, - "loss": 0.6209, - "step": 218460 - }, - { - "epoch": 1.931346028041514, - "grad_norm": 1.207894206047058, - "learning_rate": 1.7810899532641432e-05, - "loss": 0.6467, - "step": 218470 - }, - { - "epoch": 1.9314344313018266, - "grad_norm": 1.264348030090332, - "learning_rate": 1.780942614496956e-05, - "loss": 0.5884, - "step": 218480 - }, - { - "epoch": 1.9315228345621387, - "grad_norm": 2.026008367538452, - "learning_rate": 1.7807952757297692e-05, - "loss": 0.6138, - "step": 218490 - }, - { - "epoch": 1.9316112378224508, - "grad_norm": 1.5705889463424683, - "learning_rate": 1.780647936962582e-05, - "loss": 0.4809, - "step": 218500 - }, - { - "epoch": 1.9316996410827632, - "grad_norm": 4.483217239379883, - "learning_rate": 1.780500598195395e-05, - "loss": 0.5974, - "step": 218510 - }, - { - "epoch": 1.9317880443430755, - "grad_norm": 2.591930389404297, - "learning_rate": 1.7803532594282077e-05, - "loss": 0.6075, - "step": 218520 - }, - { - "epoch": 1.9318764476033876, - "grad_norm": 8.742344856262207, - "learning_rate": 1.780205920661021e-05, - "loss": 0.5389, - "step": 218530 - }, - { - "epoch": 1.9319648508636997, - "grad_norm": 1.6099954843521118, - "learning_rate": 1.7800585818938337e-05, - "loss": 0.5678, - "step": 218540 - }, - { - "epoch": 1.932053254124012, - "grad_norm": 2.3716413974761963, - "learning_rate": 1.7799112431266466e-05, - "loss": 0.4723, - "step": 218550 - }, - { - "epoch": 1.9321416573843244, - "grad_norm": 1.2897536754608154, - "learning_rate": 1.7797639043594598e-05, - "loss": 0.4575, - "step": 218560 - }, - { - "epoch": 1.9322300606446365, - "grad_norm": 8.33523941040039, - "learning_rate": 1.7796165655922726e-05, - "loss": 0.5717, - "step": 218570 - }, - { - "epoch": 1.9323184639049487, - "grad_norm": 9.135923385620117, - "learning_rate": 1.7794692268250854e-05, - "loss": 0.6403, - "step": 218580 - }, - { - "epoch": 1.932406867165261, - "grad_norm": 5.0192131996154785, - "learning_rate": 1.7793218880578983e-05, - "loss": 0.5846, - "step": 218590 - }, - { - "epoch": 1.9324952704255733, - "grad_norm": 1.7597485780715942, - "learning_rate": 1.7791745492907114e-05, - "loss": 0.4968, - "step": 218600 - }, - { - "epoch": 1.9325836736858855, - "grad_norm": 7.343288421630859, - "learning_rate": 1.7790272105235243e-05, - "loss": 0.5158, - "step": 218610 - }, - { - "epoch": 1.9326720769461978, - "grad_norm": 2.0759341716766357, - "learning_rate": 1.778879871756337e-05, - "loss": 0.5679, - "step": 218620 - }, - { - "epoch": 1.9327604802065101, - "grad_norm": 4.260840892791748, - "learning_rate": 1.77873253298915e-05, - "loss": 0.5032, - "step": 218630 - }, - { - "epoch": 1.9328488834668223, - "grad_norm": 2.3392951488494873, - "learning_rate": 1.778585194221963e-05, - "loss": 0.4972, - "step": 218640 - }, - { - "epoch": 1.9329372867271344, - "grad_norm": 2.7006070613861084, - "learning_rate": 1.778437855454776e-05, - "loss": 0.6097, - "step": 218650 - }, - { - "epoch": 1.9330256899874467, - "grad_norm": 2.1393356323242188, - "learning_rate": 1.7782905166875888e-05, - "loss": 0.6489, - "step": 218660 - }, - { - "epoch": 1.933114093247759, - "grad_norm": 2.202934741973877, - "learning_rate": 1.778143177920402e-05, - "loss": 0.5765, - "step": 218670 - }, - { - "epoch": 1.9332024965080712, - "grad_norm": 5.654677867889404, - "learning_rate": 1.7779958391532148e-05, - "loss": 0.6287, - "step": 218680 - }, - { - "epoch": 1.9332908997683833, - "grad_norm": 2.843629837036133, - "learning_rate": 1.7778485003860276e-05, - "loss": 0.6147, - "step": 218690 - }, - { - "epoch": 1.9333793030286957, - "grad_norm": 5.8555192947387695, - "learning_rate": 1.7777011616188405e-05, - "loss": 0.6377, - "step": 218700 - }, - { - "epoch": 1.933467706289008, - "grad_norm": 6.428351879119873, - "learning_rate": 1.7775538228516536e-05, - "loss": 0.59, - "step": 218710 - }, - { - "epoch": 1.9335561095493201, - "grad_norm": 1.2108049392700195, - "learning_rate": 1.7774064840844665e-05, - "loss": 0.601, - "step": 218720 - }, - { - "epoch": 1.9336445128096325, - "grad_norm": 24.356868743896484, - "learning_rate": 1.7772591453172793e-05, - "loss": 0.4592, - "step": 218730 - }, - { - "epoch": 1.9337329160699448, - "grad_norm": 1.771763801574707, - "learning_rate": 1.777111806550092e-05, - "loss": 0.5184, - "step": 218740 - }, - { - "epoch": 1.933821319330257, - "grad_norm": 2.1380646228790283, - "learning_rate": 1.7769644677829053e-05, - "loss": 0.5084, - "step": 218750 - }, - { - "epoch": 1.933909722590569, - "grad_norm": 1.9356966018676758, - "learning_rate": 1.776817129015718e-05, - "loss": 0.5513, - "step": 218760 - }, - { - "epoch": 1.9339981258508814, - "grad_norm": 1.8511749505996704, - "learning_rate": 1.776669790248531e-05, - "loss": 0.5839, - "step": 218770 - }, - { - "epoch": 1.9340865291111937, - "grad_norm": 1.9369524717330933, - "learning_rate": 1.776522451481344e-05, - "loss": 0.6438, - "step": 218780 - }, - { - "epoch": 1.9341749323715058, - "grad_norm": 7.3883466720581055, - "learning_rate": 1.776375112714157e-05, - "loss": 0.6322, - "step": 218790 - }, - { - "epoch": 1.934263335631818, - "grad_norm": 6.247851371765137, - "learning_rate": 1.77622777394697e-05, - "loss": 0.5515, - "step": 218800 - }, - { - "epoch": 1.9343517388921303, - "grad_norm": 2.7822794914245605, - "learning_rate": 1.7760804351797827e-05, - "loss": 0.5161, - "step": 218810 - }, - { - "epoch": 1.9344401421524426, - "grad_norm": 3.816363573074341, - "learning_rate": 1.775933096412596e-05, - "loss": 0.5905, - "step": 218820 - }, - { - "epoch": 1.9345285454127548, - "grad_norm": 2.320831537246704, - "learning_rate": 1.7757857576454087e-05, - "loss": 0.5651, - "step": 218830 - }, - { - "epoch": 1.934616948673067, - "grad_norm": 2.0087990760803223, - "learning_rate": 1.7756384188782215e-05, - "loss": 0.6182, - "step": 218840 - }, - { - "epoch": 1.9347053519333794, - "grad_norm": 1.3577136993408203, - "learning_rate": 1.7754910801110347e-05, - "loss": 0.5823, - "step": 218850 - }, - { - "epoch": 1.9347937551936916, - "grad_norm": 6.090202331542969, - "learning_rate": 1.7753437413438475e-05, - "loss": 0.5309, - "step": 218860 - }, - { - "epoch": 1.9348821584540037, - "grad_norm": 2.085124969482422, - "learning_rate": 1.7751964025766604e-05, - "loss": 0.5835, - "step": 218870 - }, - { - "epoch": 1.934970561714316, - "grad_norm": 1.6566983461380005, - "learning_rate": 1.7750490638094732e-05, - "loss": 0.5985, - "step": 218880 - }, - { - "epoch": 1.9350589649746284, - "grad_norm": 2.8751561641693115, - "learning_rate": 1.7749017250422864e-05, - "loss": 0.7127, - "step": 218890 - }, - { - "epoch": 1.9351473682349405, - "grad_norm": 2.1735904216766357, - "learning_rate": 1.7747543862750992e-05, - "loss": 0.5764, - "step": 218900 - }, - { - "epoch": 1.9352357714952526, - "grad_norm": 1.6033087968826294, - "learning_rate": 1.774607047507912e-05, - "loss": 0.56, - "step": 218910 - }, - { - "epoch": 1.935324174755565, - "grad_norm": 1.7736544609069824, - "learning_rate": 1.774459708740725e-05, - "loss": 0.6301, - "step": 218920 - }, - { - "epoch": 1.9354125780158773, - "grad_norm": 5.374022006988525, - "learning_rate": 1.774312369973538e-05, - "loss": 0.6536, - "step": 218930 - }, - { - "epoch": 1.9355009812761894, - "grad_norm": 5.016340732574463, - "learning_rate": 1.774165031206351e-05, - "loss": 0.693, - "step": 218940 - }, - { - "epoch": 1.9355893845365018, - "grad_norm": 2.5083281993865967, - "learning_rate": 1.7740176924391637e-05, - "loss": 0.4785, - "step": 218950 - }, - { - "epoch": 1.935677787796814, - "grad_norm": 1.362806797027588, - "learning_rate": 1.773870353671977e-05, - "loss": 0.5337, - "step": 218960 - }, - { - "epoch": 1.9357661910571262, - "grad_norm": 5.3241376876831055, - "learning_rate": 1.7737230149047897e-05, - "loss": 0.6107, - "step": 218970 - }, - { - "epoch": 1.9358545943174383, - "grad_norm": 5.739940166473389, - "learning_rate": 1.7735756761376026e-05, - "loss": 0.7272, - "step": 218980 - }, - { - "epoch": 1.9359429975777507, - "grad_norm": 5.452607154846191, - "learning_rate": 1.7734283373704154e-05, - "loss": 0.5908, - "step": 218990 - }, - { - "epoch": 1.936031400838063, - "grad_norm": 0.8039748072624207, - "learning_rate": 1.7732809986032286e-05, - "loss": 0.5703, - "step": 219000 - }, - { - "epoch": 1.9361198040983751, - "grad_norm": 1.631017804145813, - "learning_rate": 1.7731336598360414e-05, - "loss": 0.5345, - "step": 219010 - }, - { - "epoch": 1.9362082073586873, - "grad_norm": 5.559750556945801, - "learning_rate": 1.7729863210688543e-05, - "loss": 0.5953, - "step": 219020 - }, - { - "epoch": 1.9362966106189996, - "grad_norm": 1.3706836700439453, - "learning_rate": 1.7728389823016674e-05, - "loss": 0.6115, - "step": 219030 - }, - { - "epoch": 1.936385013879312, - "grad_norm": 6.054581642150879, - "learning_rate": 1.7726916435344803e-05, - "loss": 0.4715, - "step": 219040 - }, - { - "epoch": 1.936473417139624, - "grad_norm": 10.774863243103027, - "learning_rate": 1.772544304767293e-05, - "loss": 0.5946, - "step": 219050 - }, - { - "epoch": 1.9365618203999362, - "grad_norm": 3.723139524459839, - "learning_rate": 1.772396966000106e-05, - "loss": 0.7378, - "step": 219060 - }, - { - "epoch": 1.9366502236602487, - "grad_norm": 1.4909392595291138, - "learning_rate": 1.772249627232919e-05, - "loss": 0.6206, - "step": 219070 - }, - { - "epoch": 1.9367386269205609, - "grad_norm": 1.3338351249694824, - "learning_rate": 1.772102288465732e-05, - "loss": 0.5949, - "step": 219080 - }, - { - "epoch": 1.936827030180873, - "grad_norm": 1.9898263216018677, - "learning_rate": 1.7719549496985448e-05, - "loss": 0.4866, - "step": 219090 - }, - { - "epoch": 1.9369154334411853, - "grad_norm": 5.609694004058838, - "learning_rate": 1.7718076109313576e-05, - "loss": 0.583, - "step": 219100 - }, - { - "epoch": 1.9370038367014977, - "grad_norm": 4.765753269195557, - "learning_rate": 1.7716602721641708e-05, - "loss": 0.6616, - "step": 219110 - }, - { - "epoch": 1.9370922399618098, - "grad_norm": 4.974100112915039, - "learning_rate": 1.7715129333969836e-05, - "loss": 0.4614, - "step": 219120 - }, - { - "epoch": 1.937180643222122, - "grad_norm": 1.5515565872192383, - "learning_rate": 1.7713655946297965e-05, - "loss": 0.5789, - "step": 219130 - }, - { - "epoch": 1.9372690464824343, - "grad_norm": 6.178471565246582, - "learning_rate": 1.7712182558626096e-05, - "loss": 0.5698, - "step": 219140 - }, - { - "epoch": 1.9373574497427466, - "grad_norm": 6.909763336181641, - "learning_rate": 1.7710709170954225e-05, - "loss": 0.6136, - "step": 219150 - }, - { - "epoch": 1.9374458530030587, - "grad_norm": 1.6127444505691528, - "learning_rate": 1.7709235783282353e-05, - "loss": 0.5734, - "step": 219160 - }, - { - "epoch": 1.9375342562633708, - "grad_norm": 2.046926736831665, - "learning_rate": 1.7707762395610485e-05, - "loss": 0.5165, - "step": 219170 - }, - { - "epoch": 1.9376226595236834, - "grad_norm": 4.1073527336120605, - "learning_rate": 1.7706289007938613e-05, - "loss": 0.581, - "step": 219180 - }, - { - "epoch": 1.9377110627839955, - "grad_norm": 1.3950103521347046, - "learning_rate": 1.770481562026674e-05, - "loss": 0.6281, - "step": 219190 - }, - { - "epoch": 1.9377994660443076, - "grad_norm": 1.0525743961334229, - "learning_rate": 1.7703342232594873e-05, - "loss": 0.532, - "step": 219200 - }, - { - "epoch": 1.93788786930462, - "grad_norm": 0.8216108083724976, - "learning_rate": 1.7701868844923e-05, - "loss": 0.6186, - "step": 219210 - }, - { - "epoch": 1.9379762725649323, - "grad_norm": 7.460700035095215, - "learning_rate": 1.770039545725113e-05, - "loss": 0.5772, - "step": 219220 - }, - { - "epoch": 1.9380646758252444, - "grad_norm": 2.1443541049957275, - "learning_rate": 1.769892206957926e-05, - "loss": 0.5322, - "step": 219230 - }, - { - "epoch": 1.9381530790855566, - "grad_norm": 0.920703649520874, - "learning_rate": 1.769744868190739e-05, - "loss": 0.6011, - "step": 219240 - }, - { - "epoch": 1.938241482345869, - "grad_norm": 1.9932037591934204, - "learning_rate": 1.769597529423552e-05, - "loss": 0.6048, - "step": 219250 - }, - { - "epoch": 1.9383298856061812, - "grad_norm": 6.591262340545654, - "learning_rate": 1.769450190656365e-05, - "loss": 0.5356, - "step": 219260 - }, - { - "epoch": 1.9384182888664934, - "grad_norm": 4.121075630187988, - "learning_rate": 1.769302851889178e-05, - "loss": 0.5259, - "step": 219270 - }, - { - "epoch": 1.9385066921268055, - "grad_norm": 6.472079753875732, - "learning_rate": 1.7691555131219907e-05, - "loss": 0.625, - "step": 219280 - }, - { - "epoch": 1.9385950953871178, - "grad_norm": 2.06003999710083, - "learning_rate": 1.769008174354804e-05, - "loss": 0.5802, - "step": 219290 - }, - { - "epoch": 1.9386834986474302, - "grad_norm": 3.8098337650299072, - "learning_rate": 1.7688608355876167e-05, - "loss": 0.6639, - "step": 219300 - }, - { - "epoch": 1.9387719019077423, - "grad_norm": 2.501352548599243, - "learning_rate": 1.7687134968204295e-05, - "loss": 0.5246, - "step": 219310 - }, - { - "epoch": 1.9388603051680546, - "grad_norm": 5.230441093444824, - "learning_rate": 1.7685661580532427e-05, - "loss": 0.5643, - "step": 219320 - }, - { - "epoch": 1.938948708428367, - "grad_norm": 1.5599844455718994, - "learning_rate": 1.7684188192860555e-05, - "loss": 0.6277, - "step": 219330 - }, - { - "epoch": 1.939037111688679, - "grad_norm": 7.174968242645264, - "learning_rate": 1.7682714805188684e-05, - "loss": 0.5319, - "step": 219340 - }, - { - "epoch": 1.9391255149489912, - "grad_norm": 3.6672191619873047, - "learning_rate": 1.7681241417516812e-05, - "loss": 0.5435, - "step": 219350 - }, - { - "epoch": 1.9392139182093036, - "grad_norm": 6.531252861022949, - "learning_rate": 1.7679768029844944e-05, - "loss": 0.4753, - "step": 219360 - }, - { - "epoch": 1.939302321469616, - "grad_norm": 5.337547302246094, - "learning_rate": 1.7678294642173072e-05, - "loss": 0.6496, - "step": 219370 - }, - { - "epoch": 1.939390724729928, - "grad_norm": 1.5465481281280518, - "learning_rate": 1.76768212545012e-05, - "loss": 0.5702, - "step": 219380 - }, - { - "epoch": 1.9394791279902401, - "grad_norm": 4.0540642738342285, - "learning_rate": 1.767534786682933e-05, - "loss": 0.566, - "step": 219390 - }, - { - "epoch": 1.9395675312505525, - "grad_norm": 7.075825214385986, - "learning_rate": 1.767387447915746e-05, - "loss": 0.501, - "step": 219400 - }, - { - "epoch": 1.9396559345108648, - "grad_norm": 10.447091102600098, - "learning_rate": 1.767240109148559e-05, - "loss": 0.5543, - "step": 219410 - }, - { - "epoch": 1.939744337771177, - "grad_norm": 2.6997909545898438, - "learning_rate": 1.7670927703813717e-05, - "loss": 0.5825, - "step": 219420 - }, - { - "epoch": 1.9398327410314893, - "grad_norm": 2.0073082447052, - "learning_rate": 1.766945431614185e-05, - "loss": 0.5996, - "step": 219430 - }, - { - "epoch": 1.9399211442918016, - "grad_norm": 1.1852062940597534, - "learning_rate": 1.7667980928469977e-05, - "loss": 0.7153, - "step": 219440 - }, - { - "epoch": 1.9400095475521137, - "grad_norm": 1.176051378250122, - "learning_rate": 1.7666507540798106e-05, - "loss": 0.4779, - "step": 219450 - }, - { - "epoch": 1.9400979508124259, - "grad_norm": 1.9624440670013428, - "learning_rate": 1.7665034153126234e-05, - "loss": 0.4672, - "step": 219460 - }, - { - "epoch": 1.9401863540727382, - "grad_norm": 3.0061705112457275, - "learning_rate": 1.7663560765454366e-05, - "loss": 0.5718, - "step": 219470 - }, - { - "epoch": 1.9402747573330505, - "grad_norm": 3.341076612472534, - "learning_rate": 1.7662087377782494e-05, - "loss": 0.7723, - "step": 219480 - }, - { - "epoch": 1.9403631605933627, - "grad_norm": 8.552477836608887, - "learning_rate": 1.7660613990110623e-05, - "loss": 0.5439, - "step": 219490 - }, - { - "epoch": 1.9404515638536748, - "grad_norm": 12.897489547729492, - "learning_rate": 1.7659140602438754e-05, - "loss": 0.5182, - "step": 219500 - }, - { - "epoch": 1.9405399671139871, - "grad_norm": 33.714229583740234, - "learning_rate": 1.7657667214766883e-05, - "loss": 0.6536, - "step": 219510 - }, - { - "epoch": 1.9406283703742995, - "grad_norm": 1.814167857170105, - "learning_rate": 1.765619382709501e-05, - "loss": 0.5305, - "step": 219520 - }, - { - "epoch": 1.9407167736346116, - "grad_norm": 0.6747092604637146, - "learning_rate": 1.765472043942314e-05, - "loss": 0.5619, - "step": 219530 - }, - { - "epoch": 1.940805176894924, - "grad_norm": 3.3053078651428223, - "learning_rate": 1.765324705175127e-05, - "loss": 0.6962, - "step": 219540 - }, - { - "epoch": 1.9408935801552363, - "grad_norm": 1.7575353384017944, - "learning_rate": 1.76517736640794e-05, - "loss": 0.5206, - "step": 219550 - }, - { - "epoch": 1.9409819834155484, - "grad_norm": 3.9731557369232178, - "learning_rate": 1.7650300276407528e-05, - "loss": 0.6002, - "step": 219560 - }, - { - "epoch": 1.9410703866758605, - "grad_norm": 6.252354145050049, - "learning_rate": 1.7648826888735656e-05, - "loss": 0.5617, - "step": 219570 - }, - { - "epoch": 1.9411587899361729, - "grad_norm": 1.7864651679992676, - "learning_rate": 1.7647353501063788e-05, - "loss": 0.5981, - "step": 219580 - }, - { - "epoch": 1.9412471931964852, - "grad_norm": 0.9924711585044861, - "learning_rate": 1.7645880113391916e-05, - "loss": 0.7034, - "step": 219590 - }, - { - "epoch": 1.9413355964567973, - "grad_norm": 0.935824990272522, - "learning_rate": 1.7644406725720045e-05, - "loss": 0.5799, - "step": 219600 - }, - { - "epoch": 1.9414239997171094, - "grad_norm": 0.8380221724510193, - "learning_rate": 1.7642933338048176e-05, - "loss": 0.5999, - "step": 219610 - }, - { - "epoch": 1.9415124029774218, - "grad_norm": 5.587125778198242, - "learning_rate": 1.7641459950376305e-05, - "loss": 0.5707, - "step": 219620 - }, - { - "epoch": 1.9416008062377341, - "grad_norm": 2.5349559783935547, - "learning_rate": 1.7639986562704433e-05, - "loss": 0.5103, - "step": 219630 - }, - { - "epoch": 1.9416892094980462, - "grad_norm": 3.3946216106414795, - "learning_rate": 1.763851317503256e-05, - "loss": 0.644, - "step": 219640 - }, - { - "epoch": 1.9417776127583584, - "grad_norm": 1.6937671899795532, - "learning_rate": 1.7637039787360693e-05, - "loss": 0.6548, - "step": 219650 - }, - { - "epoch": 1.941866016018671, - "grad_norm": 10.187103271484375, - "learning_rate": 1.763556639968882e-05, - "loss": 0.5445, - "step": 219660 - }, - { - "epoch": 1.941954419278983, - "grad_norm": 3.1246190071105957, - "learning_rate": 1.763409301201695e-05, - "loss": 0.5554, - "step": 219670 - }, - { - "epoch": 1.9420428225392952, - "grad_norm": 1.1979979276657104, - "learning_rate": 1.763261962434508e-05, - "loss": 0.6211, - "step": 219680 - }, - { - "epoch": 1.9421312257996075, - "grad_norm": 2.406420946121216, - "learning_rate": 1.763114623667321e-05, - "loss": 0.5294, - "step": 219690 - }, - { - "epoch": 1.9422196290599198, - "grad_norm": 5.116988658905029, - "learning_rate": 1.762967284900134e-05, - "loss": 0.7088, - "step": 219700 - }, - { - "epoch": 1.942308032320232, - "grad_norm": 4.389537811279297, - "learning_rate": 1.7628199461329467e-05, - "loss": 0.4937, - "step": 219710 - }, - { - "epoch": 1.942396435580544, - "grad_norm": 1.7930338382720947, - "learning_rate": 1.76267260736576e-05, - "loss": 0.5966, - "step": 219720 - }, - { - "epoch": 1.9424848388408564, - "grad_norm": 3.2008001804351807, - "learning_rate": 1.7625252685985727e-05, - "loss": 0.5694, - "step": 219730 - }, - { - "epoch": 1.9425732421011688, - "grad_norm": 0.765235960483551, - "learning_rate": 1.7623779298313855e-05, - "loss": 0.6581, - "step": 219740 - }, - { - "epoch": 1.942661645361481, - "grad_norm": 1.841051697731018, - "learning_rate": 1.7622305910641984e-05, - "loss": 0.5654, - "step": 219750 - }, - { - "epoch": 1.942750048621793, - "grad_norm": 3.4378368854522705, - "learning_rate": 1.7620832522970115e-05, - "loss": 0.6789, - "step": 219760 - }, - { - "epoch": 1.9428384518821056, - "grad_norm": 4.095025539398193, - "learning_rate": 1.7619359135298244e-05, - "loss": 0.6061, - "step": 219770 - }, - { - "epoch": 1.9429268551424177, - "grad_norm": 2.454801082611084, - "learning_rate": 1.7617885747626372e-05, - "loss": 0.5717, - "step": 219780 - }, - { - "epoch": 1.9430152584027298, - "grad_norm": 1.8583295345306396, - "learning_rate": 1.7616412359954504e-05, - "loss": 0.6113, - "step": 219790 - }, - { - "epoch": 1.9431036616630422, - "grad_norm": 2.6790244579315186, - "learning_rate": 1.7614938972282632e-05, - "loss": 0.6239, - "step": 219800 - }, - { - "epoch": 1.9431920649233545, - "grad_norm": 3.1267857551574707, - "learning_rate": 1.761346558461076e-05, - "loss": 0.5703, - "step": 219810 - }, - { - "epoch": 1.9432804681836666, - "grad_norm": 3.0117502212524414, - "learning_rate": 1.761199219693889e-05, - "loss": 0.5695, - "step": 219820 - }, - { - "epoch": 1.9433688714439787, - "grad_norm": 3.4411027431488037, - "learning_rate": 1.761051880926702e-05, - "loss": 0.4603, - "step": 219830 - }, - { - "epoch": 1.943457274704291, - "grad_norm": 2.077807664871216, - "learning_rate": 1.760904542159515e-05, - "loss": 0.6571, - "step": 219840 - }, - { - "epoch": 1.9435456779646034, - "grad_norm": 1.2126491069793701, - "learning_rate": 1.7607572033923277e-05, - "loss": 0.5591, - "step": 219850 - }, - { - "epoch": 1.9436340812249155, - "grad_norm": 9.818131446838379, - "learning_rate": 1.7606098646251406e-05, - "loss": 0.5448, - "step": 219860 - }, - { - "epoch": 1.9437224844852277, - "grad_norm": 15.830206871032715, - "learning_rate": 1.7604625258579537e-05, - "loss": 0.5582, - "step": 219870 - }, - { - "epoch": 1.94381088774554, - "grad_norm": 2.1060850620269775, - "learning_rate": 1.7603151870907666e-05, - "loss": 0.6976, - "step": 219880 - }, - { - "epoch": 1.9438992910058523, - "grad_norm": 4.5368828773498535, - "learning_rate": 1.7601678483235794e-05, - "loss": 0.661, - "step": 219890 - }, - { - "epoch": 1.9439876942661645, - "grad_norm": 3.4462361335754395, - "learning_rate": 1.7600205095563926e-05, - "loss": 0.5099, - "step": 219900 - }, - { - "epoch": 1.9440760975264768, - "grad_norm": 1.9157859086990356, - "learning_rate": 1.7598731707892054e-05, - "loss": 0.5198, - "step": 219910 - }, - { - "epoch": 1.9441645007867892, - "grad_norm": 1.4814224243164062, - "learning_rate": 1.7597258320220182e-05, - "loss": 0.5405, - "step": 219920 - }, - { - "epoch": 1.9442529040471013, - "grad_norm": 1.172135353088379, - "learning_rate": 1.759578493254831e-05, - "loss": 0.5591, - "step": 219930 - }, - { - "epoch": 1.9443413073074134, - "grad_norm": 4.656274318695068, - "learning_rate": 1.7594311544876443e-05, - "loss": 0.7209, - "step": 219940 - }, - { - "epoch": 1.9444297105677257, - "grad_norm": 2.0200414657592773, - "learning_rate": 1.759283815720457e-05, - "loss": 0.6001, - "step": 219950 - }, - { - "epoch": 1.944518113828038, - "grad_norm": 12.98513412475586, - "learning_rate": 1.75913647695327e-05, - "loss": 0.532, - "step": 219960 - }, - { - "epoch": 1.9446065170883502, - "grad_norm": 1.2624541521072388, - "learning_rate": 1.758989138186083e-05, - "loss": 0.4632, - "step": 219970 - }, - { - "epoch": 1.9446949203486623, - "grad_norm": 2.138148307800293, - "learning_rate": 1.758841799418896e-05, - "loss": 0.6467, - "step": 219980 - }, - { - "epoch": 1.9447833236089747, - "grad_norm": 7.881981372833252, - "learning_rate": 1.7586944606517088e-05, - "loss": 0.5565, - "step": 219990 - }, - { - "epoch": 1.944871726869287, - "grad_norm": 2.5142242908477783, - "learning_rate": 1.7585471218845216e-05, - "loss": 0.615, - "step": 220000 - }, - { - "epoch": 1.9449601301295991, - "grad_norm": 1.5457180738449097, - "learning_rate": 1.7583997831173348e-05, - "loss": 0.5461, - "step": 220010 - }, - { - "epoch": 1.9450485333899115, - "grad_norm": 3.353332757949829, - "learning_rate": 1.7582524443501476e-05, - "loss": 0.5181, - "step": 220020 - }, - { - "epoch": 1.9451369366502238, - "grad_norm": 32.87668991088867, - "learning_rate": 1.7581051055829605e-05, - "loss": 0.627, - "step": 220030 - }, - { - "epoch": 1.945225339910536, - "grad_norm": 2.2376997470855713, - "learning_rate": 1.7579577668157733e-05, - "loss": 0.6101, - "step": 220040 - }, - { - "epoch": 1.945313743170848, - "grad_norm": 4.235456943511963, - "learning_rate": 1.7578104280485865e-05, - "loss": 0.4517, - "step": 220050 - }, - { - "epoch": 1.9454021464311604, - "grad_norm": 2.897719383239746, - "learning_rate": 1.7576630892813993e-05, - "loss": 0.529, - "step": 220060 - }, - { - "epoch": 1.9454905496914727, - "grad_norm": 3.140842914581299, - "learning_rate": 1.757515750514212e-05, - "loss": 0.7252, - "step": 220070 - }, - { - "epoch": 1.9455789529517848, - "grad_norm": 4.630125045776367, - "learning_rate": 1.7573684117470253e-05, - "loss": 0.6496, - "step": 220080 - }, - { - "epoch": 1.945667356212097, - "grad_norm": 2.1478161811828613, - "learning_rate": 1.757221072979838e-05, - "loss": 0.4344, - "step": 220090 - }, - { - "epoch": 1.9457557594724093, - "grad_norm": 2.4617857933044434, - "learning_rate": 1.757073734212651e-05, - "loss": 0.7266, - "step": 220100 - }, - { - "epoch": 1.9458441627327216, - "grad_norm": 3.54447603225708, - "learning_rate": 1.756926395445464e-05, - "loss": 0.6597, - "step": 220110 - }, - { - "epoch": 1.9459325659930338, - "grad_norm": 1.2158421277999878, - "learning_rate": 1.756779056678277e-05, - "loss": 0.4788, - "step": 220120 - }, - { - "epoch": 1.9460209692533461, - "grad_norm": 12.480134010314941, - "learning_rate": 1.7566317179110898e-05, - "loss": 0.6888, - "step": 220130 - }, - { - "epoch": 1.9461093725136585, - "grad_norm": 3.005518674850464, - "learning_rate": 1.756484379143903e-05, - "loss": 0.5886, - "step": 220140 - }, - { - "epoch": 1.9461977757739706, - "grad_norm": 4.216115474700928, - "learning_rate": 1.756337040376716e-05, - "loss": 0.7235, - "step": 220150 - }, - { - "epoch": 1.9462861790342827, - "grad_norm": 2.8140199184417725, - "learning_rate": 1.7561897016095287e-05, - "loss": 0.6644, - "step": 220160 - }, - { - "epoch": 1.946374582294595, - "grad_norm": 6.192380428314209, - "learning_rate": 1.756042362842342e-05, - "loss": 0.4998, - "step": 220170 - }, - { - "epoch": 1.9464629855549074, - "grad_norm": 5.766744613647461, - "learning_rate": 1.7558950240751547e-05, - "loss": 0.6183, - "step": 220180 - }, - { - "epoch": 1.9465513888152195, - "grad_norm": 5.080082416534424, - "learning_rate": 1.755747685307968e-05, - "loss": 0.6872, - "step": 220190 - }, - { - "epoch": 1.9466397920755316, - "grad_norm": 2.4956791400909424, - "learning_rate": 1.7556003465407807e-05, - "loss": 0.5551, - "step": 220200 - }, - { - "epoch": 1.946728195335844, - "grad_norm": 3.850264310836792, - "learning_rate": 1.7554530077735935e-05, - "loss": 0.685, - "step": 220210 - }, - { - "epoch": 1.9468165985961563, - "grad_norm": 1.7307443618774414, - "learning_rate": 1.7553056690064064e-05, - "loss": 0.7235, - "step": 220220 - }, - { - "epoch": 1.9469050018564684, - "grad_norm": 2.543159008026123, - "learning_rate": 1.7551583302392195e-05, - "loss": 0.6074, - "step": 220230 - }, - { - "epoch": 1.9469934051167808, - "grad_norm": 2.0163302421569824, - "learning_rate": 1.7550109914720324e-05, - "loss": 0.6399, - "step": 220240 - }, - { - "epoch": 1.947081808377093, - "grad_norm": 3.360020399093628, - "learning_rate": 1.7548636527048452e-05, - "loss": 0.5777, - "step": 220250 - }, - { - "epoch": 1.9471702116374052, - "grad_norm": 2.3825111389160156, - "learning_rate": 1.7547163139376584e-05, - "loss": 0.5344, - "step": 220260 - }, - { - "epoch": 1.9472586148977173, - "grad_norm": 3.441697835922241, - "learning_rate": 1.7545689751704712e-05, - "loss": 0.6566, - "step": 220270 - }, - { - "epoch": 1.9473470181580297, - "grad_norm": 4.799928188323975, - "learning_rate": 1.754421636403284e-05, - "loss": 0.648, - "step": 220280 - }, - { - "epoch": 1.947435421418342, - "grad_norm": 1.052435278892517, - "learning_rate": 1.754274297636097e-05, - "loss": 0.6379, - "step": 220290 - }, - { - "epoch": 1.9475238246786541, - "grad_norm": 0.9867194294929504, - "learning_rate": 1.75412695886891e-05, - "loss": 0.5768, - "step": 220300 - }, - { - "epoch": 1.9476122279389663, - "grad_norm": 2.3401873111724854, - "learning_rate": 1.753979620101723e-05, - "loss": 0.5194, - "step": 220310 - }, - { - "epoch": 1.9477006311992786, - "grad_norm": 1.3275898694992065, - "learning_rate": 1.7538322813345357e-05, - "loss": 0.7023, - "step": 220320 - }, - { - "epoch": 1.947789034459591, - "grad_norm": 1.9870930910110474, - "learning_rate": 1.7536849425673486e-05, - "loss": 0.6657, - "step": 220330 - }, - { - "epoch": 1.947877437719903, - "grad_norm": 14.915205001831055, - "learning_rate": 1.7535376038001617e-05, - "loss": 0.6116, - "step": 220340 - }, - { - "epoch": 1.9479658409802152, - "grad_norm": 1.971383810043335, - "learning_rate": 1.7533902650329746e-05, - "loss": 0.5179, - "step": 220350 - }, - { - "epoch": 1.9480542442405278, - "grad_norm": 2.5229861736297607, - "learning_rate": 1.7532429262657874e-05, - "loss": 0.6418, - "step": 220360 - }, - { - "epoch": 1.9481426475008399, - "grad_norm": 2.2057082653045654, - "learning_rate": 1.7530955874986006e-05, - "loss": 0.4316, - "step": 220370 - }, - { - "epoch": 1.948231050761152, - "grad_norm": 4.470913887023926, - "learning_rate": 1.7529482487314134e-05, - "loss": 0.6664, - "step": 220380 - }, - { - "epoch": 1.9483194540214643, - "grad_norm": 3.529062271118164, - "learning_rate": 1.7528009099642263e-05, - "loss": 0.6037, - "step": 220390 - }, - { - "epoch": 1.9484078572817767, - "grad_norm": 0.8591119050979614, - "learning_rate": 1.752653571197039e-05, - "loss": 0.4202, - "step": 220400 - }, - { - "epoch": 1.9484962605420888, - "grad_norm": 9.83202838897705, - "learning_rate": 1.7525062324298523e-05, - "loss": 0.6812, - "step": 220410 - }, - { - "epoch": 1.948584663802401, - "grad_norm": 2.8193583488464355, - "learning_rate": 1.752358893662665e-05, - "loss": 0.5509, - "step": 220420 - }, - { - "epoch": 1.9486730670627133, - "grad_norm": 3.4376721382141113, - "learning_rate": 1.752211554895478e-05, - "loss": 0.5605, - "step": 220430 - }, - { - "epoch": 1.9487614703230256, - "grad_norm": 2.1072752475738525, - "learning_rate": 1.752064216128291e-05, - "loss": 0.5252, - "step": 220440 - }, - { - "epoch": 1.9488498735833377, - "grad_norm": 8.790304183959961, - "learning_rate": 1.751916877361104e-05, - "loss": 0.6145, - "step": 220450 - }, - { - "epoch": 1.9489382768436498, - "grad_norm": 3.855455160140991, - "learning_rate": 1.7517695385939168e-05, - "loss": 0.4925, - "step": 220460 - }, - { - "epoch": 1.9490266801039622, - "grad_norm": 1.7364224195480347, - "learning_rate": 1.7516221998267296e-05, - "loss": 0.6553, - "step": 220470 - }, - { - "epoch": 1.9491150833642745, - "grad_norm": 3.5123841762542725, - "learning_rate": 1.7514748610595428e-05, - "loss": 0.5795, - "step": 220480 - }, - { - "epoch": 1.9492034866245866, - "grad_norm": 2.9621994495391846, - "learning_rate": 1.7513275222923556e-05, - "loss": 0.7128, - "step": 220490 - }, - { - "epoch": 1.949291889884899, - "grad_norm": 7.281950950622559, - "learning_rate": 1.7511801835251685e-05, - "loss": 0.6006, - "step": 220500 - }, - { - "epoch": 1.9493802931452113, - "grad_norm": 2.602921962738037, - "learning_rate": 1.7510328447579813e-05, - "loss": 0.6524, - "step": 220510 - }, - { - "epoch": 1.9494686964055234, - "grad_norm": 4.122191429138184, - "learning_rate": 1.7508855059907945e-05, - "loss": 0.5878, - "step": 220520 - }, - { - "epoch": 1.9495570996658356, - "grad_norm": 1.875119924545288, - "learning_rate": 1.7507381672236073e-05, - "loss": 0.5572, - "step": 220530 - }, - { - "epoch": 1.949645502926148, - "grad_norm": 1.1922593116760254, - "learning_rate": 1.75059082845642e-05, - "loss": 0.5543, - "step": 220540 - }, - { - "epoch": 1.9497339061864603, - "grad_norm": 1.9271132946014404, - "learning_rate": 1.7504434896892333e-05, - "loss": 0.6123, - "step": 220550 - }, - { - "epoch": 1.9498223094467724, - "grad_norm": 3.6488096714019775, - "learning_rate": 1.750296150922046e-05, - "loss": 0.6235, - "step": 220560 - }, - { - "epoch": 1.9499107127070845, - "grad_norm": 4.910078525543213, - "learning_rate": 1.750148812154859e-05, - "loss": 0.4749, - "step": 220570 - }, - { - "epoch": 1.9499991159673968, - "grad_norm": 10.244205474853516, - "learning_rate": 1.7500014733876718e-05, - "loss": 0.5819, - "step": 220580 - }, - { - "epoch": 1.9500875192277092, - "grad_norm": 1.7231125831604004, - "learning_rate": 1.749854134620485e-05, - "loss": 0.5535, - "step": 220590 - }, - { - "epoch": 1.9501759224880213, - "grad_norm": 2.103482961654663, - "learning_rate": 1.7497067958532978e-05, - "loss": 0.7089, - "step": 220600 - }, - { - "epoch": 1.9502643257483336, - "grad_norm": 2.507673501968384, - "learning_rate": 1.7495594570861107e-05, - "loss": 0.6831, - "step": 220610 - }, - { - "epoch": 1.950352729008646, - "grad_norm": 3.5907340049743652, - "learning_rate": 1.749412118318924e-05, - "loss": 0.5588, - "step": 220620 - }, - { - "epoch": 1.950441132268958, - "grad_norm": 1.2048869132995605, - "learning_rate": 1.7492647795517367e-05, - "loss": 0.5469, - "step": 220630 - }, - { - "epoch": 1.9505295355292702, - "grad_norm": 3.259854793548584, - "learning_rate": 1.7491174407845495e-05, - "loss": 0.605, - "step": 220640 - }, - { - "epoch": 1.9506179387895826, - "grad_norm": 1.5435246229171753, - "learning_rate": 1.7489701020173623e-05, - "loss": 0.5402, - "step": 220650 - }, - { - "epoch": 1.950706342049895, - "grad_norm": 19.203760147094727, - "learning_rate": 1.7488227632501755e-05, - "loss": 0.5638, - "step": 220660 - }, - { - "epoch": 1.950794745310207, - "grad_norm": 3.113534450531006, - "learning_rate": 1.7486754244829884e-05, - "loss": 0.5273, - "step": 220670 - }, - { - "epoch": 1.9508831485705191, - "grad_norm": 2.984529733657837, - "learning_rate": 1.7485280857158012e-05, - "loss": 0.4242, - "step": 220680 - }, - { - "epoch": 1.9509715518308315, - "grad_norm": 2.876054286956787, - "learning_rate": 1.748380746948614e-05, - "loss": 0.606, - "step": 220690 - }, - { - "epoch": 1.9510599550911438, - "grad_norm": 7.738475799560547, - "learning_rate": 1.7482334081814272e-05, - "loss": 0.6156, - "step": 220700 - }, - { - "epoch": 1.951148358351456, - "grad_norm": 2.8049612045288086, - "learning_rate": 1.74808606941424e-05, - "loss": 0.6601, - "step": 220710 - }, - { - "epoch": 1.9512367616117683, - "grad_norm": 2.8683419227600098, - "learning_rate": 1.747938730647053e-05, - "loss": 0.5211, - "step": 220720 - }, - { - "epoch": 1.9513251648720806, - "grad_norm": 9.514555931091309, - "learning_rate": 1.747791391879866e-05, - "loss": 0.5206, - "step": 220730 - }, - { - "epoch": 1.9514135681323928, - "grad_norm": 12.106396675109863, - "learning_rate": 1.747644053112679e-05, - "loss": 0.5466, - "step": 220740 - }, - { - "epoch": 1.9515019713927049, - "grad_norm": 3.4347753524780273, - "learning_rate": 1.7474967143454917e-05, - "loss": 0.6684, - "step": 220750 - }, - { - "epoch": 1.9515903746530172, - "grad_norm": 7.132259368896484, - "learning_rate": 1.7473493755783046e-05, - "loss": 0.7184, - "step": 220760 - }, - { - "epoch": 1.9516787779133296, - "grad_norm": 5.191409111022949, - "learning_rate": 1.7472020368111177e-05, - "loss": 0.5463, - "step": 220770 - }, - { - "epoch": 1.9517671811736417, - "grad_norm": 1.6866577863693237, - "learning_rate": 1.7470546980439306e-05, - "loss": 0.5319, - "step": 220780 - }, - { - "epoch": 1.9518555844339538, - "grad_norm": 1.7636666297912598, - "learning_rate": 1.7469073592767434e-05, - "loss": 0.5774, - "step": 220790 - }, - { - "epoch": 1.9519439876942661, - "grad_norm": 6.350882530212402, - "learning_rate": 1.7467600205095562e-05, - "loss": 0.5517, - "step": 220800 - }, - { - "epoch": 1.9520323909545785, - "grad_norm": 0.730274498462677, - "learning_rate": 1.7466126817423694e-05, - "loss": 0.5857, - "step": 220810 - }, - { - "epoch": 1.9521207942148906, - "grad_norm": 2.098360776901245, - "learning_rate": 1.7464653429751822e-05, - "loss": 0.6221, - "step": 220820 - }, - { - "epoch": 1.952209197475203, - "grad_norm": 3.16597843170166, - "learning_rate": 1.746318004207995e-05, - "loss": 0.5988, - "step": 220830 - }, - { - "epoch": 1.9522976007355153, - "grad_norm": 1.323750376701355, - "learning_rate": 1.7461706654408083e-05, - "loss": 0.586, - "step": 220840 - }, - { - "epoch": 1.9523860039958274, - "grad_norm": 1.8122895956039429, - "learning_rate": 1.746023326673621e-05, - "loss": 0.5637, - "step": 220850 - }, - { - "epoch": 1.9524744072561395, - "grad_norm": 2.3582842350006104, - "learning_rate": 1.745875987906434e-05, - "loss": 0.6373, - "step": 220860 - }, - { - "epoch": 1.9525628105164519, - "grad_norm": 1.4971257448196411, - "learning_rate": 1.7457286491392468e-05, - "loss": 0.4774, - "step": 220870 - }, - { - "epoch": 1.9526512137767642, - "grad_norm": 1.4984142780303955, - "learning_rate": 1.74558131037206e-05, - "loss": 0.412, - "step": 220880 - }, - { - "epoch": 1.9527396170370763, - "grad_norm": 3.6662018299102783, - "learning_rate": 1.7454339716048728e-05, - "loss": 0.5171, - "step": 220890 - }, - { - "epoch": 1.9528280202973884, - "grad_norm": 4.355205535888672, - "learning_rate": 1.7452866328376856e-05, - "loss": 0.5553, - "step": 220900 - }, - { - "epoch": 1.9529164235577008, - "grad_norm": 13.22236156463623, - "learning_rate": 1.7451392940704988e-05, - "loss": 0.4429, - "step": 220910 - }, - { - "epoch": 1.9530048268180131, - "grad_norm": 2.350409746170044, - "learning_rate": 1.7449919553033116e-05, - "loss": 0.5378, - "step": 220920 - }, - { - "epoch": 1.9530932300783252, - "grad_norm": 1.5182199478149414, - "learning_rate": 1.7448446165361244e-05, - "loss": 0.574, - "step": 220930 - }, - { - "epoch": 1.9531816333386374, - "grad_norm": 2.9196348190307617, - "learning_rate": 1.7446972777689373e-05, - "loss": 0.6858, - "step": 220940 - }, - { - "epoch": 1.95327003659895, - "grad_norm": 4.065303325653076, - "learning_rate": 1.7445499390017505e-05, - "loss": 0.4565, - "step": 220950 - }, - { - "epoch": 1.953358439859262, - "grad_norm": 2.180522918701172, - "learning_rate": 1.7444026002345633e-05, - "loss": 0.5924, - "step": 220960 - }, - { - "epoch": 1.9534468431195742, - "grad_norm": 4.2608489990234375, - "learning_rate": 1.744255261467376e-05, - "loss": 0.6149, - "step": 220970 - }, - { - "epoch": 1.9535352463798865, - "grad_norm": 4.069046497344971, - "learning_rate": 1.7441079227001893e-05, - "loss": 0.5646, - "step": 220980 - }, - { - "epoch": 1.9536236496401989, - "grad_norm": 5.257110595703125, - "learning_rate": 1.743960583933002e-05, - "loss": 0.5123, - "step": 220990 - }, - { - "epoch": 1.953712052900511, - "grad_norm": 3.287450075149536, - "learning_rate": 1.743813245165815e-05, - "loss": 0.4468, - "step": 221000 - }, - { - "epoch": 1.953800456160823, - "grad_norm": 2.3431289196014404, - "learning_rate": 1.743665906398628e-05, - "loss": 0.4935, - "step": 221010 - }, - { - "epoch": 1.9538888594211354, - "grad_norm": 1.9732731580734253, - "learning_rate": 1.743518567631441e-05, - "loss": 0.655, - "step": 221020 - }, - { - "epoch": 1.9539772626814478, - "grad_norm": 2.1418793201446533, - "learning_rate": 1.7433712288642538e-05, - "loss": 0.638, - "step": 221030 - }, - { - "epoch": 1.95406566594176, - "grad_norm": 1.3343536853790283, - "learning_rate": 1.743223890097067e-05, - "loss": 0.4028, - "step": 221040 - }, - { - "epoch": 1.954154069202072, - "grad_norm": 4.15090274810791, - "learning_rate": 1.7430765513298798e-05, - "loss": 0.6298, - "step": 221050 - }, - { - "epoch": 1.9542424724623844, - "grad_norm": 3.681791305541992, - "learning_rate": 1.7429292125626927e-05, - "loss": 0.5714, - "step": 221060 - }, - { - "epoch": 1.9543308757226967, - "grad_norm": 2.90159010887146, - "learning_rate": 1.742781873795506e-05, - "loss": 0.6419, - "step": 221070 - }, - { - "epoch": 1.9544192789830088, - "grad_norm": 2.430762529373169, - "learning_rate": 1.7426345350283187e-05, - "loss": 0.6476, - "step": 221080 - }, - { - "epoch": 1.9545076822433212, - "grad_norm": 1.8313844203948975, - "learning_rate": 1.7424871962611315e-05, - "loss": 0.5767, - "step": 221090 - }, - { - "epoch": 1.9545960855036335, - "grad_norm": 4.6836090087890625, - "learning_rate": 1.7423398574939447e-05, - "loss": 0.6324, - "step": 221100 - }, - { - "epoch": 1.9546844887639456, - "grad_norm": 2.1411750316619873, - "learning_rate": 1.7421925187267575e-05, - "loss": 0.46, - "step": 221110 - }, - { - "epoch": 1.9547728920242577, - "grad_norm": 3.8862268924713135, - "learning_rate": 1.7420451799595704e-05, - "loss": 0.6116, - "step": 221120 - }, - { - "epoch": 1.95486129528457, - "grad_norm": 2.3128890991210938, - "learning_rate": 1.7418978411923835e-05, - "loss": 0.6273, - "step": 221130 - }, - { - "epoch": 1.9549496985448824, - "grad_norm": 2.4391748905181885, - "learning_rate": 1.7417505024251964e-05, - "loss": 0.5168, - "step": 221140 - }, - { - "epoch": 1.9550381018051946, - "grad_norm": 4.619393825531006, - "learning_rate": 1.7416031636580092e-05, - "loss": 0.598, - "step": 221150 - }, - { - "epoch": 1.9551265050655067, - "grad_norm": 2.1790719032287598, - "learning_rate": 1.741455824890822e-05, - "loss": 0.5018, - "step": 221160 - }, - { - "epoch": 1.955214908325819, - "grad_norm": 6.281453609466553, - "learning_rate": 1.7413084861236352e-05, - "loss": 0.7997, - "step": 221170 - }, - { - "epoch": 1.9553033115861314, - "grad_norm": 2.5911691188812256, - "learning_rate": 1.741161147356448e-05, - "loss": 0.5766, - "step": 221180 - }, - { - "epoch": 1.9553917148464435, - "grad_norm": 8.28908920288086, - "learning_rate": 1.741013808589261e-05, - "loss": 0.6161, - "step": 221190 - }, - { - "epoch": 1.9554801181067558, - "grad_norm": 2.1980628967285156, - "learning_rate": 1.740866469822074e-05, - "loss": 0.6146, - "step": 221200 - }, - { - "epoch": 1.9555685213670682, - "grad_norm": 2.1065611839294434, - "learning_rate": 1.740719131054887e-05, - "loss": 0.5509, - "step": 221210 - }, - { - "epoch": 1.9556569246273803, - "grad_norm": 6.382716655731201, - "learning_rate": 1.7405717922876997e-05, - "loss": 0.5751, - "step": 221220 - }, - { - "epoch": 1.9557453278876924, - "grad_norm": 7.217451095581055, - "learning_rate": 1.7404244535205126e-05, - "loss": 0.4805, - "step": 221230 - }, - { - "epoch": 1.9558337311480047, - "grad_norm": 11.226778984069824, - "learning_rate": 1.7402771147533257e-05, - "loss": 0.6936, - "step": 221240 - }, - { - "epoch": 1.955922134408317, - "grad_norm": 2.2104413509368896, - "learning_rate": 1.7401297759861386e-05, - "loss": 0.5766, - "step": 221250 - }, - { - "epoch": 1.9560105376686292, - "grad_norm": 1.5729948282241821, - "learning_rate": 1.7399824372189514e-05, - "loss": 0.574, - "step": 221260 - }, - { - "epoch": 1.9560989409289413, - "grad_norm": 0.9875640273094177, - "learning_rate": 1.7398350984517646e-05, - "loss": 0.6298, - "step": 221270 - }, - { - "epoch": 1.9561873441892537, - "grad_norm": 10.27600383758545, - "learning_rate": 1.7396877596845774e-05, - "loss": 0.5781, - "step": 221280 - }, - { - "epoch": 1.956275747449566, - "grad_norm": 2.2132785320281982, - "learning_rate": 1.7395404209173902e-05, - "loss": 0.6105, - "step": 221290 - }, - { - "epoch": 1.9563641507098781, - "grad_norm": 1.3178938627243042, - "learning_rate": 1.739393082150203e-05, - "loss": 0.4922, - "step": 221300 - }, - { - "epoch": 1.9564525539701905, - "grad_norm": 1.907183051109314, - "learning_rate": 1.7392457433830163e-05, - "loss": 0.5972, - "step": 221310 - }, - { - "epoch": 1.9565409572305028, - "grad_norm": 4.384500980377197, - "learning_rate": 1.739098404615829e-05, - "loss": 0.7256, - "step": 221320 - }, - { - "epoch": 1.956629360490815, - "grad_norm": 7.438844203948975, - "learning_rate": 1.738951065848642e-05, - "loss": 0.673, - "step": 221330 - }, - { - "epoch": 1.956717763751127, - "grad_norm": 2.2127161026000977, - "learning_rate": 1.7388037270814548e-05, - "loss": 0.595, - "step": 221340 - }, - { - "epoch": 1.9568061670114394, - "grad_norm": 6.308504581451416, - "learning_rate": 1.738656388314268e-05, - "loss": 0.5618, - "step": 221350 - }, - { - "epoch": 1.9568945702717517, - "grad_norm": 3.3190057277679443, - "learning_rate": 1.7385090495470808e-05, - "loss": 0.5663, - "step": 221360 - }, - { - "epoch": 1.9569829735320639, - "grad_norm": 2.126783609390259, - "learning_rate": 1.7383617107798936e-05, - "loss": 0.56, - "step": 221370 - }, - { - "epoch": 1.957071376792376, - "grad_norm": 6.160731315612793, - "learning_rate": 1.7382143720127068e-05, - "loss": 0.6325, - "step": 221380 - }, - { - "epoch": 1.9571597800526883, - "grad_norm": 2.9567158222198486, - "learning_rate": 1.7380670332455196e-05, - "loss": 0.6028, - "step": 221390 - }, - { - "epoch": 1.9572481833130007, - "grad_norm": 1.7168047428131104, - "learning_rate": 1.7379196944783325e-05, - "loss": 0.6372, - "step": 221400 - }, - { - "epoch": 1.9573365865733128, - "grad_norm": 1.3090665340423584, - "learning_rate": 1.7377723557111453e-05, - "loss": 0.5813, - "step": 221410 - }, - { - "epoch": 1.9574249898336251, - "grad_norm": 3.2118301391601562, - "learning_rate": 1.7376250169439585e-05, - "loss": 0.6302, - "step": 221420 - }, - { - "epoch": 1.9575133930939375, - "grad_norm": 3.053651809692383, - "learning_rate": 1.7374776781767713e-05, - "loss": 0.4884, - "step": 221430 - }, - { - "epoch": 1.9576017963542496, - "grad_norm": 2.939345359802246, - "learning_rate": 1.737330339409584e-05, - "loss": 0.5157, - "step": 221440 - }, - { - "epoch": 1.9576901996145617, - "grad_norm": 5.004700660705566, - "learning_rate": 1.737183000642397e-05, - "loss": 0.6239, - "step": 221450 - }, - { - "epoch": 1.957778602874874, - "grad_norm": 1.8308154344558716, - "learning_rate": 1.73703566187521e-05, - "loss": 0.4895, - "step": 221460 - }, - { - "epoch": 1.9578670061351864, - "grad_norm": 1.0355510711669922, - "learning_rate": 1.736888323108023e-05, - "loss": 0.5118, - "step": 221470 - }, - { - "epoch": 1.9579554093954985, - "grad_norm": 2.7190637588500977, - "learning_rate": 1.7367409843408358e-05, - "loss": 0.6767, - "step": 221480 - }, - { - "epoch": 1.9580438126558106, - "grad_norm": 2.793962001800537, - "learning_rate": 1.736593645573649e-05, - "loss": 0.5561, - "step": 221490 - }, - { - "epoch": 1.958132215916123, - "grad_norm": 2.219176769256592, - "learning_rate": 1.7364463068064618e-05, - "loss": 0.6688, - "step": 221500 - }, - { - "epoch": 1.9582206191764353, - "grad_norm": 2.296807050704956, - "learning_rate": 1.7362989680392747e-05, - "loss": 0.6831, - "step": 221510 - }, - { - "epoch": 1.9583090224367474, - "grad_norm": 2.7684853076934814, - "learning_rate": 1.7361516292720875e-05, - "loss": 0.5368, - "step": 221520 - }, - { - "epoch": 1.9583974256970595, - "grad_norm": 3.423884630203247, - "learning_rate": 1.7360042905049007e-05, - "loss": 0.516, - "step": 221530 - }, - { - "epoch": 1.958485828957372, - "grad_norm": 2.5242016315460205, - "learning_rate": 1.7358569517377135e-05, - "loss": 0.6295, - "step": 221540 - }, - { - "epoch": 1.9585742322176842, - "grad_norm": 4.421204566955566, - "learning_rate": 1.7357096129705263e-05, - "loss": 0.6851, - "step": 221550 - }, - { - "epoch": 1.9586626354779963, - "grad_norm": 4.011968612670898, - "learning_rate": 1.7355622742033395e-05, - "loss": 0.5472, - "step": 221560 - }, - { - "epoch": 1.9587510387383087, - "grad_norm": 1.3766950368881226, - "learning_rate": 1.7354149354361523e-05, - "loss": 0.4817, - "step": 221570 - }, - { - "epoch": 1.958839441998621, - "grad_norm": 2.665876865386963, - "learning_rate": 1.7352675966689652e-05, - "loss": 0.626, - "step": 221580 - }, - { - "epoch": 1.9589278452589332, - "grad_norm": 2.626681327819824, - "learning_rate": 1.735120257901778e-05, - "loss": 0.6421, - "step": 221590 - }, - { - "epoch": 1.9590162485192453, - "grad_norm": 3.987231492996216, - "learning_rate": 1.7349729191345912e-05, - "loss": 0.5362, - "step": 221600 - }, - { - "epoch": 1.9591046517795576, - "grad_norm": 4.009259223937988, - "learning_rate": 1.734825580367404e-05, - "loss": 0.5257, - "step": 221610 - }, - { - "epoch": 1.95919305503987, - "grad_norm": 1.906485676765442, - "learning_rate": 1.734678241600217e-05, - "loss": 0.4726, - "step": 221620 - }, - { - "epoch": 1.959281458300182, - "grad_norm": 1.893676996231079, - "learning_rate": 1.7345309028330297e-05, - "loss": 0.4433, - "step": 221630 - }, - { - "epoch": 1.9593698615604942, - "grad_norm": 1.9703915119171143, - "learning_rate": 1.734383564065843e-05, - "loss": 0.598, - "step": 221640 - }, - { - "epoch": 1.9594582648208065, - "grad_norm": 3.427255153656006, - "learning_rate": 1.7342362252986557e-05, - "loss": 0.7, - "step": 221650 - }, - { - "epoch": 1.9595466680811189, - "grad_norm": 4.3255486488342285, - "learning_rate": 1.7340888865314685e-05, - "loss": 0.5803, - "step": 221660 - }, - { - "epoch": 1.959635071341431, - "grad_norm": 2.629002094268799, - "learning_rate": 1.7339415477642817e-05, - "loss": 0.5961, - "step": 221670 - }, - { - "epoch": 1.9597234746017433, - "grad_norm": 1.9308866262435913, - "learning_rate": 1.7337942089970946e-05, - "loss": 0.6365, - "step": 221680 - }, - { - "epoch": 1.9598118778620557, - "grad_norm": 2.3796756267547607, - "learning_rate": 1.7336468702299074e-05, - "loss": 0.667, - "step": 221690 - }, - { - "epoch": 1.9599002811223678, - "grad_norm": 4.674548149108887, - "learning_rate": 1.7334995314627202e-05, - "loss": 0.617, - "step": 221700 - }, - { - "epoch": 1.95998868438268, - "grad_norm": 2.9246327877044678, - "learning_rate": 1.7333521926955334e-05, - "loss": 0.619, - "step": 221710 - }, - { - "epoch": 1.9600770876429923, - "grad_norm": 1.9208478927612305, - "learning_rate": 1.7332048539283462e-05, - "loss": 0.6139, - "step": 221720 - }, - { - "epoch": 1.9601654909033046, - "grad_norm": 3.7790932655334473, - "learning_rate": 1.733057515161159e-05, - "loss": 0.6783, - "step": 221730 - }, - { - "epoch": 1.9602538941636167, - "grad_norm": 1.993432879447937, - "learning_rate": 1.7329101763939722e-05, - "loss": 0.5181, - "step": 221740 - }, - { - "epoch": 1.9603422974239288, - "grad_norm": 2.6456339359283447, - "learning_rate": 1.732762837626785e-05, - "loss": 0.5437, - "step": 221750 - }, - { - "epoch": 1.9604307006842412, - "grad_norm": 4.101899147033691, - "learning_rate": 1.732615498859598e-05, - "loss": 0.6074, - "step": 221760 - }, - { - "epoch": 1.9605191039445535, - "grad_norm": 1.424377202987671, - "learning_rate": 1.7324681600924108e-05, - "loss": 0.4778, - "step": 221770 - }, - { - "epoch": 1.9606075072048657, - "grad_norm": 4.055168151855469, - "learning_rate": 1.732320821325224e-05, - "loss": 0.4997, - "step": 221780 - }, - { - "epoch": 1.960695910465178, - "grad_norm": 3.0688679218292236, - "learning_rate": 1.7321734825580368e-05, - "loss": 0.6575, - "step": 221790 - }, - { - "epoch": 1.9607843137254903, - "grad_norm": 2.3685131072998047, - "learning_rate": 1.7320261437908496e-05, - "loss": 0.6747, - "step": 221800 - }, - { - "epoch": 1.9608727169858025, - "grad_norm": 5.343352794647217, - "learning_rate": 1.7318788050236624e-05, - "loss": 0.6151, - "step": 221810 - }, - { - "epoch": 1.9609611202461146, - "grad_norm": 4.508944988250732, - "learning_rate": 1.7317314662564756e-05, - "loss": 0.5315, - "step": 221820 - }, - { - "epoch": 1.961049523506427, - "grad_norm": 3.288653612136841, - "learning_rate": 1.7315841274892884e-05, - "loss": 0.7137, - "step": 221830 - }, - { - "epoch": 1.9611379267667393, - "grad_norm": 3.245378255844116, - "learning_rate": 1.7314367887221013e-05, - "loss": 0.4592, - "step": 221840 - }, - { - "epoch": 1.9612263300270514, - "grad_norm": 2.223806858062744, - "learning_rate": 1.7312894499549145e-05, - "loss": 0.5834, - "step": 221850 - }, - { - "epoch": 1.9613147332873635, - "grad_norm": 2.324831962585449, - "learning_rate": 1.7311421111877273e-05, - "loss": 0.4512, - "step": 221860 - }, - { - "epoch": 1.9614031365476758, - "grad_norm": 6.371609687805176, - "learning_rate": 1.73099477242054e-05, - "loss": 0.5267, - "step": 221870 - }, - { - "epoch": 1.9614915398079882, - "grad_norm": 1.8866710662841797, - "learning_rate": 1.730847433653353e-05, - "loss": 0.5281, - "step": 221880 - }, - { - "epoch": 1.9615799430683003, - "grad_norm": 1.9318867921829224, - "learning_rate": 1.730700094886166e-05, - "loss": 0.5328, - "step": 221890 - }, - { - "epoch": 1.9616683463286126, - "grad_norm": 2.1442458629608154, - "learning_rate": 1.730552756118979e-05, - "loss": 0.6054, - "step": 221900 - }, - { - "epoch": 1.961756749588925, - "grad_norm": 3.8569774627685547, - "learning_rate": 1.7304054173517918e-05, - "loss": 0.7032, - "step": 221910 - }, - { - "epoch": 1.961845152849237, - "grad_norm": 3.075002431869507, - "learning_rate": 1.730258078584605e-05, - "loss": 0.595, - "step": 221920 - }, - { - "epoch": 1.9619335561095492, - "grad_norm": 8.860819816589355, - "learning_rate": 1.7301107398174178e-05, - "loss": 0.5075, - "step": 221930 - }, - { - "epoch": 1.9620219593698616, - "grad_norm": 3.092060089111328, - "learning_rate": 1.7299634010502306e-05, - "loss": 0.4836, - "step": 221940 - }, - { - "epoch": 1.962110362630174, - "grad_norm": 1.856972575187683, - "learning_rate": 1.7298160622830438e-05, - "loss": 0.479, - "step": 221950 - }, - { - "epoch": 1.962198765890486, - "grad_norm": 1.5563569068908691, - "learning_rate": 1.7296687235158567e-05, - "loss": 0.6582, - "step": 221960 - }, - { - "epoch": 1.9622871691507981, - "grad_norm": 0.956799328327179, - "learning_rate": 1.7295213847486695e-05, - "loss": 0.529, - "step": 221970 - }, - { - "epoch": 1.9623755724111105, - "grad_norm": 3.0957534313201904, - "learning_rate": 1.7293740459814827e-05, - "loss": 0.6286, - "step": 221980 - }, - { - "epoch": 1.9624639756714228, - "grad_norm": 1.7938803434371948, - "learning_rate": 1.7292267072142955e-05, - "loss": 0.6059, - "step": 221990 - }, - { - "epoch": 1.962552378931735, - "grad_norm": 1.8811876773834229, - "learning_rate": 1.7290793684471083e-05, - "loss": 0.5837, - "step": 222000 - }, - { - "epoch": 1.9626407821920473, - "grad_norm": 4.525232791900635, - "learning_rate": 1.7289320296799215e-05, - "loss": 0.5446, - "step": 222010 - }, - { - "epoch": 1.9627291854523596, - "grad_norm": 1.8074058294296265, - "learning_rate": 1.7287846909127343e-05, - "loss": 0.4753, - "step": 222020 - }, - { - "epoch": 1.9628175887126718, - "grad_norm": 5.129820346832275, - "learning_rate": 1.7286373521455472e-05, - "loss": 0.6748, - "step": 222030 - }, - { - "epoch": 1.9629059919729839, - "grad_norm": 8.805209159851074, - "learning_rate": 1.7284900133783604e-05, - "loss": 0.6701, - "step": 222040 - }, - { - "epoch": 1.9629943952332962, - "grad_norm": 2.0817229747772217, - "learning_rate": 1.7283426746111732e-05, - "loss": 0.5398, - "step": 222050 - }, - { - "epoch": 1.9630827984936086, - "grad_norm": 2.0642473697662354, - "learning_rate": 1.728195335843986e-05, - "loss": 0.5827, - "step": 222060 - }, - { - "epoch": 1.9631712017539207, - "grad_norm": 2.650526762008667, - "learning_rate": 1.7280479970767992e-05, - "loss": 0.6029, - "step": 222070 - }, - { - "epoch": 1.9632596050142328, - "grad_norm": 0.8851326704025269, - "learning_rate": 1.727900658309612e-05, - "loss": 0.5382, - "step": 222080 - }, - { - "epoch": 1.9633480082745451, - "grad_norm": 6.130293846130371, - "learning_rate": 1.727753319542425e-05, - "loss": 0.4914, - "step": 222090 - }, - { - "epoch": 1.9634364115348575, - "grad_norm": 1.810418963432312, - "learning_rate": 1.7276059807752377e-05, - "loss": 0.5117, - "step": 222100 - }, - { - "epoch": 1.9635248147951696, - "grad_norm": 2.6192986965179443, - "learning_rate": 1.727458642008051e-05, - "loss": 0.5139, - "step": 222110 - }, - { - "epoch": 1.9636132180554817, - "grad_norm": 1.8537731170654297, - "learning_rate": 1.7273113032408637e-05, - "loss": 0.573, - "step": 222120 - }, - { - "epoch": 1.9637016213157943, - "grad_norm": 2.8963286876678467, - "learning_rate": 1.7271639644736766e-05, - "loss": 0.6338, - "step": 222130 - }, - { - "epoch": 1.9637900245761064, - "grad_norm": 3.3957526683807373, - "learning_rate": 1.7270166257064897e-05, - "loss": 0.6349, - "step": 222140 - }, - { - "epoch": 1.9638784278364185, - "grad_norm": 0.9304403066635132, - "learning_rate": 1.7268692869393026e-05, - "loss": 0.5048, - "step": 222150 - }, - { - "epoch": 1.9639668310967309, - "grad_norm": 1.9414174556732178, - "learning_rate": 1.7267219481721154e-05, - "loss": 0.4382, - "step": 222160 - }, - { - "epoch": 1.9640552343570432, - "grad_norm": 5.736922740936279, - "learning_rate": 1.7265746094049282e-05, - "loss": 0.4992, - "step": 222170 - }, - { - "epoch": 1.9641436376173553, - "grad_norm": 1.3160322904586792, - "learning_rate": 1.7264272706377414e-05, - "loss": 0.6757, - "step": 222180 - }, - { - "epoch": 1.9642320408776675, - "grad_norm": 4.047842025756836, - "learning_rate": 1.7262799318705542e-05, - "loss": 0.5021, - "step": 222190 - }, - { - "epoch": 1.9643204441379798, - "grad_norm": 3.707914113998413, - "learning_rate": 1.726132593103367e-05, - "loss": 0.5876, - "step": 222200 - }, - { - "epoch": 1.9644088473982921, - "grad_norm": 1.5215938091278076, - "learning_rate": 1.7259852543361803e-05, - "loss": 0.5758, - "step": 222210 - }, - { - "epoch": 1.9644972506586043, - "grad_norm": 5.436439037322998, - "learning_rate": 1.725837915568993e-05, - "loss": 0.5506, - "step": 222220 - }, - { - "epoch": 1.9645856539189164, - "grad_norm": 1.7833653688430786, - "learning_rate": 1.725690576801806e-05, - "loss": 0.6581, - "step": 222230 - }, - { - "epoch": 1.9646740571792287, - "grad_norm": 22.85245132446289, - "learning_rate": 1.7255432380346188e-05, - "loss": 0.6773, - "step": 222240 - }, - { - "epoch": 1.964762460439541, - "grad_norm": 2.7302658557891846, - "learning_rate": 1.725395899267432e-05, - "loss": 0.5082, - "step": 222250 - }, - { - "epoch": 1.9648508636998532, - "grad_norm": 10.478650093078613, - "learning_rate": 1.7252485605002448e-05, - "loss": 0.5939, - "step": 222260 - }, - { - "epoch": 1.9649392669601655, - "grad_norm": 1.1871991157531738, - "learning_rate": 1.7251012217330576e-05, - "loss": 0.6735, - "step": 222270 - }, - { - "epoch": 1.9650276702204779, - "grad_norm": 1.9310252666473389, - "learning_rate": 1.7249538829658704e-05, - "loss": 0.6796, - "step": 222280 - }, - { - "epoch": 1.96511607348079, - "grad_norm": 7.742509841918945, - "learning_rate": 1.7248065441986836e-05, - "loss": 0.6949, - "step": 222290 - }, - { - "epoch": 1.965204476741102, - "grad_norm": 4.2397780418396, - "learning_rate": 1.7246592054314964e-05, - "loss": 0.4619, - "step": 222300 - }, - { - "epoch": 1.9652928800014144, - "grad_norm": 2.080115795135498, - "learning_rate": 1.7245118666643093e-05, - "loss": 0.4737, - "step": 222310 - }, - { - "epoch": 1.9653812832617268, - "grad_norm": 5.406563758850098, - "learning_rate": 1.7243645278971225e-05, - "loss": 0.5169, - "step": 222320 - }, - { - "epoch": 1.965469686522039, - "grad_norm": 2.611933469772339, - "learning_rate": 1.7242171891299353e-05, - "loss": 0.5915, - "step": 222330 - }, - { - "epoch": 1.965558089782351, - "grad_norm": 2.100005626678467, - "learning_rate": 1.724069850362748e-05, - "loss": 0.5918, - "step": 222340 - }, - { - "epoch": 1.9656464930426634, - "grad_norm": 8.699605941772461, - "learning_rate": 1.723922511595561e-05, - "loss": 0.6256, - "step": 222350 - }, - { - "epoch": 1.9657348963029757, - "grad_norm": 4.119903087615967, - "learning_rate": 1.723775172828374e-05, - "loss": 0.5898, - "step": 222360 - }, - { - "epoch": 1.9658232995632878, - "grad_norm": 2.810309410095215, - "learning_rate": 1.723627834061187e-05, - "loss": 0.5349, - "step": 222370 - }, - { - "epoch": 1.9659117028236002, - "grad_norm": 8.267805099487305, - "learning_rate": 1.7234804952939998e-05, - "loss": 0.6738, - "step": 222380 - }, - { - "epoch": 1.9660001060839125, - "grad_norm": 2.084251642227173, - "learning_rate": 1.7233331565268126e-05, - "loss": 0.7415, - "step": 222390 - }, - { - "epoch": 1.9660885093442246, - "grad_norm": 1.9852428436279297, - "learning_rate": 1.7231858177596258e-05, - "loss": 0.5328, - "step": 222400 - }, - { - "epoch": 1.9661769126045368, - "grad_norm": 1.7430096864700317, - "learning_rate": 1.7230384789924387e-05, - "loss": 0.5559, - "step": 222410 - }, - { - "epoch": 1.966265315864849, - "grad_norm": 2.324712038040161, - "learning_rate": 1.7228911402252515e-05, - "loss": 0.6603, - "step": 222420 - }, - { - "epoch": 1.9663537191251614, - "grad_norm": 3.147503614425659, - "learning_rate": 1.7227438014580647e-05, - "loss": 0.5688, - "step": 222430 - }, - { - "epoch": 1.9664421223854736, - "grad_norm": 4.333643913269043, - "learning_rate": 1.7225964626908775e-05, - "loss": 0.5561, - "step": 222440 - }, - { - "epoch": 1.9665305256457857, - "grad_norm": 4.8724894523620605, - "learning_rate": 1.7224491239236903e-05, - "loss": 0.4943, - "step": 222450 - }, - { - "epoch": 1.966618928906098, - "grad_norm": 7.888250350952148, - "learning_rate": 1.7223017851565032e-05, - "loss": 0.5848, - "step": 222460 - }, - { - "epoch": 1.9667073321664104, - "grad_norm": 7.013293743133545, - "learning_rate": 1.7221544463893163e-05, - "loss": 0.4606, - "step": 222470 - }, - { - "epoch": 1.9667957354267225, - "grad_norm": 1.9578282833099365, - "learning_rate": 1.7220071076221292e-05, - "loss": 0.5711, - "step": 222480 - }, - { - "epoch": 1.9668841386870348, - "grad_norm": 3.4705073833465576, - "learning_rate": 1.721859768854942e-05, - "loss": 0.6787, - "step": 222490 - }, - { - "epoch": 1.9669725419473472, - "grad_norm": 1.9974721670150757, - "learning_rate": 1.7217124300877552e-05, - "loss": 0.6046, - "step": 222500 - }, - { - "epoch": 1.9670609452076593, - "grad_norm": 1.1950721740722656, - "learning_rate": 1.721565091320568e-05, - "loss": 0.5288, - "step": 222510 - }, - { - "epoch": 1.9671493484679714, - "grad_norm": 3.3858697414398193, - "learning_rate": 1.721417752553381e-05, - "loss": 0.5076, - "step": 222520 - }, - { - "epoch": 1.9672377517282837, - "grad_norm": 2.76285457611084, - "learning_rate": 1.7212704137861937e-05, - "loss": 0.5661, - "step": 222530 - }, - { - "epoch": 1.967326154988596, - "grad_norm": 3.6475632190704346, - "learning_rate": 1.721123075019007e-05, - "loss": 0.6155, - "step": 222540 - }, - { - "epoch": 1.9674145582489082, - "grad_norm": 1.3591192960739136, - "learning_rate": 1.7209757362518197e-05, - "loss": 0.5272, - "step": 222550 - }, - { - "epoch": 1.9675029615092203, - "grad_norm": 2.6487836837768555, - "learning_rate": 1.7208283974846325e-05, - "loss": 0.5823, - "step": 222560 - }, - { - "epoch": 1.9675913647695327, - "grad_norm": 1.8409322500228882, - "learning_rate": 1.7206810587174454e-05, - "loss": 0.6024, - "step": 222570 - }, - { - "epoch": 1.967679768029845, - "grad_norm": 4.987606048583984, - "learning_rate": 1.7205337199502586e-05, - "loss": 0.5538, - "step": 222580 - }, - { - "epoch": 1.9677681712901571, - "grad_norm": 7.849449157714844, - "learning_rate": 1.7203863811830714e-05, - "loss": 0.5508, - "step": 222590 - }, - { - "epoch": 1.9678565745504695, - "grad_norm": 2.5497729778289795, - "learning_rate": 1.7202390424158842e-05, - "loss": 0.4697, - "step": 222600 - }, - { - "epoch": 1.9679449778107818, - "grad_norm": 5.652052879333496, - "learning_rate": 1.7200917036486974e-05, - "loss": 0.5289, - "step": 222610 - }, - { - "epoch": 1.968033381071094, - "grad_norm": 6.274253845214844, - "learning_rate": 1.7199443648815102e-05, - "loss": 0.5764, - "step": 222620 - }, - { - "epoch": 1.968121784331406, - "grad_norm": 3.169924020767212, - "learning_rate": 1.719797026114323e-05, - "loss": 0.6174, - "step": 222630 - }, - { - "epoch": 1.9682101875917184, - "grad_norm": 5.025245189666748, - "learning_rate": 1.719649687347136e-05, - "loss": 0.5409, - "step": 222640 - }, - { - "epoch": 1.9682985908520307, - "grad_norm": 1.7776600122451782, - "learning_rate": 1.719502348579949e-05, - "loss": 0.6629, - "step": 222650 - }, - { - "epoch": 1.9683869941123429, - "grad_norm": 2.793548822402954, - "learning_rate": 1.719355009812762e-05, - "loss": 0.6304, - "step": 222660 - }, - { - "epoch": 1.968475397372655, - "grad_norm": 3.562716007232666, - "learning_rate": 1.7192076710455747e-05, - "loss": 0.5184, - "step": 222670 - }, - { - "epoch": 1.9685638006329673, - "grad_norm": 2.9733078479766846, - "learning_rate": 1.719060332278388e-05, - "loss": 0.5315, - "step": 222680 - }, - { - "epoch": 1.9686522038932797, - "grad_norm": 6.737588882446289, - "learning_rate": 1.7189129935112008e-05, - "loss": 0.601, - "step": 222690 - }, - { - "epoch": 1.9687406071535918, - "grad_norm": 2.0825278759002686, - "learning_rate": 1.7187656547440136e-05, - "loss": 0.6412, - "step": 222700 - }, - { - "epoch": 1.968829010413904, - "grad_norm": 2.5049214363098145, - "learning_rate": 1.7186183159768264e-05, - "loss": 0.6238, - "step": 222710 - }, - { - "epoch": 1.9689174136742165, - "grad_norm": 5.799861907958984, - "learning_rate": 1.7184709772096396e-05, - "loss": 0.5981, - "step": 222720 - }, - { - "epoch": 1.9690058169345286, - "grad_norm": 1.855312466621399, - "learning_rate": 1.7183236384424524e-05, - "loss": 0.4881, - "step": 222730 - }, - { - "epoch": 1.9690942201948407, - "grad_norm": 2.7078020572662354, - "learning_rate": 1.7181762996752653e-05, - "loss": 0.6108, - "step": 222740 - }, - { - "epoch": 1.969182623455153, - "grad_norm": 2.7934305667877197, - "learning_rate": 1.718028960908078e-05, - "loss": 0.543, - "step": 222750 - }, - { - "epoch": 1.9692710267154654, - "grad_norm": 2.9295403957366943, - "learning_rate": 1.7178816221408913e-05, - "loss": 0.6324, - "step": 222760 - }, - { - "epoch": 1.9693594299757775, - "grad_norm": 1.3017882108688354, - "learning_rate": 1.717734283373704e-05, - "loss": 0.4405, - "step": 222770 - }, - { - "epoch": 1.9694478332360896, - "grad_norm": 1.115747332572937, - "learning_rate": 1.717586944606517e-05, - "loss": 0.5383, - "step": 222780 - }, - { - "epoch": 1.969536236496402, - "grad_norm": 2.6722826957702637, - "learning_rate": 1.71743960583933e-05, - "loss": 0.5871, - "step": 222790 - }, - { - "epoch": 1.9696246397567143, - "grad_norm": 3.0671226978302, - "learning_rate": 1.717292267072143e-05, - "loss": 0.5741, - "step": 222800 - }, - { - "epoch": 1.9697130430170264, - "grad_norm": 1.6251859664916992, - "learning_rate": 1.7171449283049558e-05, - "loss": 0.6286, - "step": 222810 - }, - { - "epoch": 1.9698014462773386, - "grad_norm": 1.8720160722732544, - "learning_rate": 1.7169975895377686e-05, - "loss": 0.6504, - "step": 222820 - }, - { - "epoch": 1.969889849537651, - "grad_norm": 6.490540981292725, - "learning_rate": 1.7168502507705818e-05, - "loss": 0.6905, - "step": 222830 - }, - { - "epoch": 1.9699782527979632, - "grad_norm": 7.432729721069336, - "learning_rate": 1.7167029120033946e-05, - "loss": 0.6618, - "step": 222840 - }, - { - "epoch": 1.9700666560582754, - "grad_norm": 1.296164631843567, - "learning_rate": 1.7165555732362075e-05, - "loss": 0.5143, - "step": 222850 - }, - { - "epoch": 1.9701550593185877, - "grad_norm": 3.0250802040100098, - "learning_rate": 1.7164082344690207e-05, - "loss": 0.5507, - "step": 222860 - }, - { - "epoch": 1.9702434625789, - "grad_norm": 3.119205951690674, - "learning_rate": 1.7162608957018335e-05, - "loss": 0.6189, - "step": 222870 - }, - { - "epoch": 1.9703318658392122, - "grad_norm": 4.983765602111816, - "learning_rate": 1.7161135569346463e-05, - "loss": 0.5083, - "step": 222880 - }, - { - "epoch": 1.9704202690995243, - "grad_norm": 14.583525657653809, - "learning_rate": 1.7159662181674595e-05, - "loss": 0.4849, - "step": 222890 - }, - { - "epoch": 1.9705086723598366, - "grad_norm": 2.0588018894195557, - "learning_rate": 1.7158188794002723e-05, - "loss": 0.5834, - "step": 222900 - }, - { - "epoch": 1.970597075620149, - "grad_norm": 1.2134294509887695, - "learning_rate": 1.715671540633085e-05, - "loss": 0.5814, - "step": 222910 - }, - { - "epoch": 1.970685478880461, - "grad_norm": 3.06860089302063, - "learning_rate": 1.7155242018658983e-05, - "loss": 0.5837, - "step": 222920 - }, - { - "epoch": 1.9707738821407732, - "grad_norm": 2.7625577449798584, - "learning_rate": 1.7153768630987112e-05, - "loss": 0.6332, - "step": 222930 - }, - { - "epoch": 1.9708622854010855, - "grad_norm": 1.0985058546066284, - "learning_rate": 1.715229524331524e-05, - "loss": 0.393, - "step": 222940 - }, - { - "epoch": 1.9709506886613979, - "grad_norm": 6.152040958404541, - "learning_rate": 1.7150821855643372e-05, - "loss": 0.621, - "step": 222950 - }, - { - "epoch": 1.97103909192171, - "grad_norm": 10.569106101989746, - "learning_rate": 1.71493484679715e-05, - "loss": 0.5013, - "step": 222960 - }, - { - "epoch": 1.9711274951820223, - "grad_norm": 4.52792501449585, - "learning_rate": 1.714787508029963e-05, - "loss": 0.5851, - "step": 222970 - }, - { - "epoch": 1.9712158984423347, - "grad_norm": 5.344310283660889, - "learning_rate": 1.714640169262776e-05, - "loss": 0.6643, - "step": 222980 - }, - { - "epoch": 1.9713043017026468, - "grad_norm": 5.182750225067139, - "learning_rate": 1.714492830495589e-05, - "loss": 0.6485, - "step": 222990 - }, - { - "epoch": 1.971392704962959, - "grad_norm": 6.226066589355469, - "learning_rate": 1.7143454917284017e-05, - "loss": 0.5357, - "step": 223000 - }, - { - "epoch": 1.9714811082232713, - "grad_norm": 2.8686330318450928, - "learning_rate": 1.714198152961215e-05, - "loss": 0.5842, - "step": 223010 - }, - { - "epoch": 1.9715695114835836, - "grad_norm": 1.1921875476837158, - "learning_rate": 1.7140508141940277e-05, - "loss": 0.5514, - "step": 223020 - }, - { - "epoch": 1.9716579147438957, - "grad_norm": 1.88923978805542, - "learning_rate": 1.7139034754268405e-05, - "loss": 0.4687, - "step": 223030 - }, - { - "epoch": 1.9717463180042079, - "grad_norm": 1.6415197849273682, - "learning_rate": 1.7137561366596534e-05, - "loss": 0.6425, - "step": 223040 - }, - { - "epoch": 1.9718347212645202, - "grad_norm": 1.456709384918213, - "learning_rate": 1.7136087978924666e-05, - "loss": 0.5695, - "step": 223050 - }, - { - "epoch": 1.9719231245248325, - "grad_norm": 6.440140247344971, - "learning_rate": 1.7134614591252794e-05, - "loss": 0.6765, - "step": 223060 - }, - { - "epoch": 1.9720115277851447, - "grad_norm": 1.7574011087417603, - "learning_rate": 1.7133141203580922e-05, - "loss": 0.5549, - "step": 223070 - }, - { - "epoch": 1.972099931045457, - "grad_norm": 4.2524333000183105, - "learning_rate": 1.7131667815909054e-05, - "loss": 0.5492, - "step": 223080 - }, - { - "epoch": 1.9721883343057693, - "grad_norm": 1.1085690259933472, - "learning_rate": 1.7130194428237182e-05, - "loss": 0.5494, - "step": 223090 - }, - { - "epoch": 1.9722767375660815, - "grad_norm": 3.139369010925293, - "learning_rate": 1.712872104056531e-05, - "loss": 0.6718, - "step": 223100 - }, - { - "epoch": 1.9723651408263936, - "grad_norm": 3.9366915225982666, - "learning_rate": 1.712724765289344e-05, - "loss": 0.5698, - "step": 223110 - }, - { - "epoch": 1.972453544086706, - "grad_norm": 6.333446502685547, - "learning_rate": 1.712577426522157e-05, - "loss": 0.6644, - "step": 223120 - }, - { - "epoch": 1.9725419473470183, - "grad_norm": 1.8347561359405518, - "learning_rate": 1.71243008775497e-05, - "loss": 0.4329, - "step": 223130 - }, - { - "epoch": 1.9726303506073304, - "grad_norm": 2.3457577228546143, - "learning_rate": 1.7122827489877828e-05, - "loss": 0.5492, - "step": 223140 - }, - { - "epoch": 1.9727187538676425, - "grad_norm": 1.201335072517395, - "learning_rate": 1.712135410220596e-05, - "loss": 0.5605, - "step": 223150 - }, - { - "epoch": 1.9728071571279548, - "grad_norm": 4.401676654815674, - "learning_rate": 1.7119880714534088e-05, - "loss": 0.5035, - "step": 223160 - }, - { - "epoch": 1.9728955603882672, - "grad_norm": 2.910940408706665, - "learning_rate": 1.7118407326862216e-05, - "loss": 0.6122, - "step": 223170 - }, - { - "epoch": 1.9729839636485793, - "grad_norm": 8.407724380493164, - "learning_rate": 1.7116933939190344e-05, - "loss": 0.5584, - "step": 223180 - }, - { - "epoch": 1.9730723669088917, - "grad_norm": 11.145696640014648, - "learning_rate": 1.7115460551518476e-05, - "loss": 0.5517, - "step": 223190 - }, - { - "epoch": 1.973160770169204, - "grad_norm": 11.784648895263672, - "learning_rate": 1.7113987163846604e-05, - "loss": 0.6739, - "step": 223200 - }, - { - "epoch": 1.9732491734295161, - "grad_norm": 5.412535190582275, - "learning_rate": 1.7112513776174733e-05, - "loss": 0.5425, - "step": 223210 - }, - { - "epoch": 1.9733375766898282, - "grad_norm": 3.030336618423462, - "learning_rate": 1.711104038850286e-05, - "loss": 0.5982, - "step": 223220 - }, - { - "epoch": 1.9734259799501406, - "grad_norm": 1.529349446296692, - "learning_rate": 1.7109567000830993e-05, - "loss": 0.4851, - "step": 223230 - }, - { - "epoch": 1.973514383210453, - "grad_norm": 1.9232240915298462, - "learning_rate": 1.710809361315912e-05, - "loss": 0.6172, - "step": 223240 - }, - { - "epoch": 1.973602786470765, - "grad_norm": 6.125307559967041, - "learning_rate": 1.710662022548725e-05, - "loss": 0.6683, - "step": 223250 - }, - { - "epoch": 1.9736911897310772, - "grad_norm": 1.5917388200759888, - "learning_rate": 1.710514683781538e-05, - "loss": 0.6482, - "step": 223260 - }, - { - "epoch": 1.9737795929913895, - "grad_norm": 2.928166389465332, - "learning_rate": 1.710367345014351e-05, - "loss": 0.5313, - "step": 223270 - }, - { - "epoch": 1.9738679962517018, - "grad_norm": 8.321270942687988, - "learning_rate": 1.7102200062471638e-05, - "loss": 0.5604, - "step": 223280 - }, - { - "epoch": 1.973956399512014, - "grad_norm": 2.138234853744507, - "learning_rate": 1.7100726674799766e-05, - "loss": 0.6185, - "step": 223290 - }, - { - "epoch": 1.974044802772326, - "grad_norm": 3.251054525375366, - "learning_rate": 1.7099253287127898e-05, - "loss": 0.5883, - "step": 223300 - }, - { - "epoch": 1.9741332060326386, - "grad_norm": 5.7253594398498535, - "learning_rate": 1.7097779899456026e-05, - "loss": 0.6261, - "step": 223310 - }, - { - "epoch": 1.9742216092929508, - "grad_norm": 7.327157974243164, - "learning_rate": 1.7096306511784155e-05, - "loss": 0.6169, - "step": 223320 - }, - { - "epoch": 1.9743100125532629, - "grad_norm": 2.8174843788146973, - "learning_rate": 1.7094833124112287e-05, - "loss": 0.7236, - "step": 223330 - }, - { - "epoch": 1.9743984158135752, - "grad_norm": 5.326597690582275, - "learning_rate": 1.7093359736440415e-05, - "loss": 0.6744, - "step": 223340 - }, - { - "epoch": 1.9744868190738876, - "grad_norm": 2.247544050216675, - "learning_rate": 1.7091886348768543e-05, - "loss": 0.5186, - "step": 223350 - }, - { - "epoch": 1.9745752223341997, - "grad_norm": 1.2743721008300781, - "learning_rate": 1.709041296109667e-05, - "loss": 0.6286, - "step": 223360 - }, - { - "epoch": 1.9746636255945118, - "grad_norm": 5.933443546295166, - "learning_rate": 1.7088939573424803e-05, - "loss": 0.6492, - "step": 223370 - }, - { - "epoch": 1.9747520288548241, - "grad_norm": 2.982179880142212, - "learning_rate": 1.7087466185752932e-05, - "loss": 0.6541, - "step": 223380 - }, - { - "epoch": 1.9748404321151365, - "grad_norm": 10.137446403503418, - "learning_rate": 1.708599279808106e-05, - "loss": 0.6684, - "step": 223390 - }, - { - "epoch": 1.9749288353754486, - "grad_norm": 1.6906108856201172, - "learning_rate": 1.708451941040919e-05, - "loss": 0.5938, - "step": 223400 - }, - { - "epoch": 1.9750172386357607, - "grad_norm": 5.015181064605713, - "learning_rate": 1.708304602273732e-05, - "loss": 0.5494, - "step": 223410 - }, - { - "epoch": 1.975105641896073, - "grad_norm": 3.6553337574005127, - "learning_rate": 1.708157263506545e-05, - "loss": 0.4865, - "step": 223420 - }, - { - "epoch": 1.9751940451563854, - "grad_norm": 2.070610523223877, - "learning_rate": 1.7080099247393577e-05, - "loss": 0.5332, - "step": 223430 - }, - { - "epoch": 1.9752824484166975, - "grad_norm": 3.7528462409973145, - "learning_rate": 1.707862585972171e-05, - "loss": 0.5404, - "step": 223440 - }, - { - "epoch": 1.9753708516770099, - "grad_norm": 1.6363284587860107, - "learning_rate": 1.7077152472049837e-05, - "loss": 0.5117, - "step": 223450 - }, - { - "epoch": 1.9754592549373222, - "grad_norm": 7.974064350128174, - "learning_rate": 1.7075679084377965e-05, - "loss": 0.6744, - "step": 223460 - }, - { - "epoch": 1.9755476581976343, - "grad_norm": 1.0796862840652466, - "learning_rate": 1.7074205696706094e-05, - "loss": 0.6018, - "step": 223470 - }, - { - "epoch": 1.9756360614579465, - "grad_norm": 1.667531967163086, - "learning_rate": 1.7072732309034225e-05, - "loss": 0.6464, - "step": 223480 - }, - { - "epoch": 1.9757244647182588, - "grad_norm": 2.247187376022339, - "learning_rate": 1.7071258921362354e-05, - "loss": 0.6127, - "step": 223490 - }, - { - "epoch": 1.9758128679785711, - "grad_norm": 1.4988588094711304, - "learning_rate": 1.7069785533690482e-05, - "loss": 0.5959, - "step": 223500 - }, - { - "epoch": 1.9759012712388833, - "grad_norm": 1.8883183002471924, - "learning_rate": 1.706831214601861e-05, - "loss": 0.5806, - "step": 223510 - }, - { - "epoch": 1.9759896744991954, - "grad_norm": 4.116348743438721, - "learning_rate": 1.7066838758346742e-05, - "loss": 0.5542, - "step": 223520 - }, - { - "epoch": 1.9760780777595077, - "grad_norm": 2.1979382038116455, - "learning_rate": 1.706536537067487e-05, - "loss": 0.427, - "step": 223530 - }, - { - "epoch": 1.97616648101982, - "grad_norm": 2.150432825088501, - "learning_rate": 1.7063891983003e-05, - "loss": 0.6589, - "step": 223540 - }, - { - "epoch": 1.9762548842801322, - "grad_norm": 1.7369264364242554, - "learning_rate": 1.706241859533113e-05, - "loss": 0.6432, - "step": 223550 - }, - { - "epoch": 1.9763432875404445, - "grad_norm": 5.763035297393799, - "learning_rate": 1.706094520765926e-05, - "loss": 0.5455, - "step": 223560 - }, - { - "epoch": 1.9764316908007569, - "grad_norm": 8.456995010375977, - "learning_rate": 1.7059471819987387e-05, - "loss": 0.617, - "step": 223570 - }, - { - "epoch": 1.976520094061069, - "grad_norm": 6.879094123840332, - "learning_rate": 1.7057998432315516e-05, - "loss": 0.5605, - "step": 223580 - }, - { - "epoch": 1.976608497321381, - "grad_norm": 3.256601572036743, - "learning_rate": 1.7056525044643648e-05, - "loss": 0.6778, - "step": 223590 - }, - { - "epoch": 1.9766969005816935, - "grad_norm": 1.143511176109314, - "learning_rate": 1.7055051656971776e-05, - "loss": 0.6732, - "step": 223600 - }, - { - "epoch": 1.9767853038420058, - "grad_norm": 1.5223276615142822, - "learning_rate": 1.7053578269299904e-05, - "loss": 0.5141, - "step": 223610 - }, - { - "epoch": 1.976873707102318, - "grad_norm": 2.047243595123291, - "learning_rate": 1.7052104881628036e-05, - "loss": 0.695, - "step": 223620 - }, - { - "epoch": 1.97696211036263, - "grad_norm": 1.709922432899475, - "learning_rate": 1.7050631493956164e-05, - "loss": 0.6434, - "step": 223630 - }, - { - "epoch": 1.9770505136229424, - "grad_norm": 1.0415716171264648, - "learning_rate": 1.7049158106284293e-05, - "loss": 0.4897, - "step": 223640 - }, - { - "epoch": 1.9771389168832547, - "grad_norm": 0.822580873966217, - "learning_rate": 1.704768471861242e-05, - "loss": 0.5427, - "step": 223650 - }, - { - "epoch": 1.9772273201435668, - "grad_norm": 2.1559183597564697, - "learning_rate": 1.7046211330940553e-05, - "loss": 0.5389, - "step": 223660 - }, - { - "epoch": 1.9773157234038792, - "grad_norm": 1.7369835376739502, - "learning_rate": 1.704473794326868e-05, - "loss": 0.6673, - "step": 223670 - }, - { - "epoch": 1.9774041266641915, - "grad_norm": 2.9662587642669678, - "learning_rate": 1.704326455559681e-05, - "loss": 0.6322, - "step": 223680 - }, - { - "epoch": 1.9774925299245036, - "grad_norm": 1.961627721786499, - "learning_rate": 1.7041791167924938e-05, - "loss": 0.5055, - "step": 223690 - }, - { - "epoch": 1.9775809331848158, - "grad_norm": 1.8878660202026367, - "learning_rate": 1.704031778025307e-05, - "loss": 0.6352, - "step": 223700 - }, - { - "epoch": 1.977669336445128, - "grad_norm": 1.2861078977584839, - "learning_rate": 1.7038844392581198e-05, - "loss": 0.5676, - "step": 223710 - }, - { - "epoch": 1.9777577397054404, - "grad_norm": 13.850279808044434, - "learning_rate": 1.7037371004909326e-05, - "loss": 0.6429, - "step": 223720 - }, - { - "epoch": 1.9778461429657526, - "grad_norm": 3.532600164413452, - "learning_rate": 1.7035897617237458e-05, - "loss": 0.6275, - "step": 223730 - }, - { - "epoch": 1.9779345462260647, - "grad_norm": 2.6759657859802246, - "learning_rate": 1.7034424229565586e-05, - "loss": 0.5844, - "step": 223740 - }, - { - "epoch": 1.978022949486377, - "grad_norm": 1.7044777870178223, - "learning_rate": 1.7032950841893715e-05, - "loss": 0.4891, - "step": 223750 - }, - { - "epoch": 1.9781113527466894, - "grad_norm": 9.880300521850586, - "learning_rate": 1.7031477454221843e-05, - "loss": 0.6351, - "step": 223760 - }, - { - "epoch": 1.9781997560070015, - "grad_norm": 7.09929895401001, - "learning_rate": 1.7030004066549975e-05, - "loss": 0.4399, - "step": 223770 - }, - { - "epoch": 1.9782881592673138, - "grad_norm": 3.8416786193847656, - "learning_rate": 1.7028530678878103e-05, - "loss": 0.5856, - "step": 223780 - }, - { - "epoch": 1.9783765625276262, - "grad_norm": 1.2900749444961548, - "learning_rate": 1.702705729120623e-05, - "loss": 0.4727, - "step": 223790 - }, - { - "epoch": 1.9784649657879383, - "grad_norm": 3.191159963607788, - "learning_rate": 1.7025583903534363e-05, - "loss": 0.7087, - "step": 223800 - }, - { - "epoch": 1.9785533690482504, - "grad_norm": 3.7237839698791504, - "learning_rate": 1.702411051586249e-05, - "loss": 0.7018, - "step": 223810 - }, - { - "epoch": 1.9786417723085628, - "grad_norm": 3.7309305667877197, - "learning_rate": 1.702263712819062e-05, - "loss": 0.5741, - "step": 223820 - }, - { - "epoch": 1.978730175568875, - "grad_norm": 2.9731409549713135, - "learning_rate": 1.7021163740518752e-05, - "loss": 0.6159, - "step": 223830 - }, - { - "epoch": 1.9788185788291872, - "grad_norm": 2.7989695072174072, - "learning_rate": 1.701969035284688e-05, - "loss": 0.5392, - "step": 223840 - }, - { - "epoch": 1.9789069820894993, - "grad_norm": 1.714094638824463, - "learning_rate": 1.701821696517501e-05, - "loss": 0.576, - "step": 223850 - }, - { - "epoch": 1.9789953853498117, - "grad_norm": 6.915410995483398, - "learning_rate": 1.701674357750314e-05, - "loss": 0.5674, - "step": 223860 - }, - { - "epoch": 1.979083788610124, - "grad_norm": 4.811580181121826, - "learning_rate": 1.701527018983127e-05, - "loss": 0.5876, - "step": 223870 - }, - { - "epoch": 1.9791721918704361, - "grad_norm": 2.657339096069336, - "learning_rate": 1.7013796802159397e-05, - "loss": 0.7162, - "step": 223880 - }, - { - "epoch": 1.9792605951307483, - "grad_norm": 4.997399806976318, - "learning_rate": 1.701232341448753e-05, - "loss": 0.6316, - "step": 223890 - }, - { - "epoch": 1.9793489983910608, - "grad_norm": 4.83448600769043, - "learning_rate": 1.7010850026815657e-05, - "loss": 0.6614, - "step": 223900 - }, - { - "epoch": 1.979437401651373, - "grad_norm": 1.3343744277954102, - "learning_rate": 1.7009376639143785e-05, - "loss": 0.5769, - "step": 223910 - }, - { - "epoch": 1.979525804911685, - "grad_norm": 2.2980024814605713, - "learning_rate": 1.7007903251471917e-05, - "loss": 0.6001, - "step": 223920 - }, - { - "epoch": 1.9796142081719974, - "grad_norm": 1.0228185653686523, - "learning_rate": 1.7006429863800045e-05, - "loss": 0.6659, - "step": 223930 - }, - { - "epoch": 1.9797026114323097, - "grad_norm": 1.5537577867507935, - "learning_rate": 1.7004956476128174e-05, - "loss": 0.7196, - "step": 223940 - }, - { - "epoch": 1.9797910146926219, - "grad_norm": 1.3778533935546875, - "learning_rate": 1.7003483088456306e-05, - "loss": 0.6119, - "step": 223950 - }, - { - "epoch": 1.979879417952934, - "grad_norm": 1.0338380336761475, - "learning_rate": 1.7002009700784434e-05, - "loss": 0.4922, - "step": 223960 - }, - { - "epoch": 1.9799678212132463, - "grad_norm": 9.00503921508789, - "learning_rate": 1.7000536313112562e-05, - "loss": 0.7009, - "step": 223970 - }, - { - "epoch": 1.9800562244735587, - "grad_norm": 3.3645524978637695, - "learning_rate": 1.6999062925440694e-05, - "loss": 0.554, - "step": 223980 - }, - { - "epoch": 1.9801446277338708, - "grad_norm": 1.2249828577041626, - "learning_rate": 1.6997589537768822e-05, - "loss": 0.4951, - "step": 223990 - }, - { - "epoch": 1.980233030994183, - "grad_norm": 2.8988823890686035, - "learning_rate": 1.699611615009695e-05, - "loss": 0.6621, - "step": 224000 - }, - { - "epoch": 1.9803214342544955, - "grad_norm": 1.5652815103530884, - "learning_rate": 1.699464276242508e-05, - "loss": 0.4766, - "step": 224010 - }, - { - "epoch": 1.9804098375148076, - "grad_norm": 3.9531476497650146, - "learning_rate": 1.699316937475321e-05, - "loss": 0.5626, - "step": 224020 - }, - { - "epoch": 1.9804982407751197, - "grad_norm": 2.6915416717529297, - "learning_rate": 1.699169598708134e-05, - "loss": 0.5738, - "step": 224030 - }, - { - "epoch": 1.980586644035432, - "grad_norm": 2.531986713409424, - "learning_rate": 1.6990222599409467e-05, - "loss": 0.5644, - "step": 224040 - }, - { - "epoch": 1.9806750472957444, - "grad_norm": 1.614586353302002, - "learning_rate": 1.6988749211737596e-05, - "loss": 0.5142, - "step": 224050 - }, - { - "epoch": 1.9807634505560565, - "grad_norm": 1.6339393854141235, - "learning_rate": 1.6987275824065728e-05, - "loss": 0.5071, - "step": 224060 - }, - { - "epoch": 1.9808518538163686, - "grad_norm": 1.582903504371643, - "learning_rate": 1.6985802436393856e-05, - "loss": 0.5189, - "step": 224070 - }, - { - "epoch": 1.980940257076681, - "grad_norm": 4.0491557121276855, - "learning_rate": 1.6984329048721984e-05, - "loss": 0.5312, - "step": 224080 - }, - { - "epoch": 1.9810286603369933, - "grad_norm": 1.9501909017562866, - "learning_rate": 1.6982855661050116e-05, - "loss": 0.6763, - "step": 224090 - }, - { - "epoch": 1.9811170635973054, - "grad_norm": 11.996707916259766, - "learning_rate": 1.6981382273378244e-05, - "loss": 0.6142, - "step": 224100 - }, - { - "epoch": 1.9812054668576176, - "grad_norm": 4.226593494415283, - "learning_rate": 1.6979908885706373e-05, - "loss": 0.5013, - "step": 224110 - }, - { - "epoch": 1.98129387011793, - "grad_norm": 2.621124029159546, - "learning_rate": 1.69784354980345e-05, - "loss": 0.6391, - "step": 224120 - }, - { - "epoch": 1.9813822733782422, - "grad_norm": 2.4573519229888916, - "learning_rate": 1.6976962110362633e-05, - "loss": 0.5746, - "step": 224130 - }, - { - "epoch": 1.9814706766385544, - "grad_norm": 11.391940116882324, - "learning_rate": 1.697548872269076e-05, - "loss": 0.7617, - "step": 224140 - }, - { - "epoch": 1.9815590798988667, - "grad_norm": 7.178558826446533, - "learning_rate": 1.697401533501889e-05, - "loss": 0.5036, - "step": 224150 - }, - { - "epoch": 1.981647483159179, - "grad_norm": 2.7364306449890137, - "learning_rate": 1.6972541947347018e-05, - "loss": 0.6303, - "step": 224160 - }, - { - "epoch": 1.9817358864194912, - "grad_norm": 10.925390243530273, - "learning_rate": 1.697106855967515e-05, - "loss": 0.5193, - "step": 224170 - }, - { - "epoch": 1.9818242896798033, - "grad_norm": 1.7822974920272827, - "learning_rate": 1.6969595172003278e-05, - "loss": 0.5683, - "step": 224180 - }, - { - "epoch": 1.9819126929401156, - "grad_norm": 15.627070426940918, - "learning_rate": 1.6968121784331406e-05, - "loss": 0.6209, - "step": 224190 - }, - { - "epoch": 1.982001096200428, - "grad_norm": 5.662389755249023, - "learning_rate": 1.6966648396659538e-05, - "loss": 0.5114, - "step": 224200 - }, - { - "epoch": 1.98208949946074, - "grad_norm": 1.9230458736419678, - "learning_rate": 1.6965175008987666e-05, - "loss": 0.4327, - "step": 224210 - }, - { - "epoch": 1.9821779027210522, - "grad_norm": 3.2107596397399902, - "learning_rate": 1.6963701621315795e-05, - "loss": 0.4802, - "step": 224220 - }, - { - "epoch": 1.9822663059813646, - "grad_norm": 3.1691925525665283, - "learning_rate": 1.6962228233643923e-05, - "loss": 0.5961, - "step": 224230 - }, - { - "epoch": 1.982354709241677, - "grad_norm": 1.4854533672332764, - "learning_rate": 1.6960754845972055e-05, - "loss": 0.5751, - "step": 224240 - }, - { - "epoch": 1.982443112501989, - "grad_norm": 1.9018033742904663, - "learning_rate": 1.6959281458300183e-05, - "loss": 0.6033, - "step": 224250 - }, - { - "epoch": 1.9825315157623014, - "grad_norm": 1.478623390197754, - "learning_rate": 1.695780807062831e-05, - "loss": 0.6022, - "step": 224260 - }, - { - "epoch": 1.9826199190226137, - "grad_norm": 3.3851511478424072, - "learning_rate": 1.6956334682956443e-05, - "loss": 0.6146, - "step": 224270 - }, - { - "epoch": 1.9827083222829258, - "grad_norm": 2.368901014328003, - "learning_rate": 1.6954861295284572e-05, - "loss": 0.5482, - "step": 224280 - }, - { - "epoch": 1.982796725543238, - "grad_norm": 2.6048879623413086, - "learning_rate": 1.69533879076127e-05, - "loss": 0.7511, - "step": 224290 - }, - { - "epoch": 1.9828851288035503, - "grad_norm": 1.4162763357162476, - "learning_rate": 1.695191451994083e-05, - "loss": 0.5106, - "step": 224300 - }, - { - "epoch": 1.9829735320638626, - "grad_norm": 6.880292892456055, - "learning_rate": 1.695044113226896e-05, - "loss": 0.5504, - "step": 224310 - }, - { - "epoch": 1.9830619353241747, - "grad_norm": 1.9633214473724365, - "learning_rate": 1.694896774459709e-05, - "loss": 0.3765, - "step": 224320 - }, - { - "epoch": 1.9831503385844869, - "grad_norm": 3.665835380554199, - "learning_rate": 1.6947494356925217e-05, - "loss": 0.5829, - "step": 224330 - }, - { - "epoch": 1.9832387418447992, - "grad_norm": 1.6488367319107056, - "learning_rate": 1.6946020969253345e-05, - "loss": 0.5372, - "step": 224340 - }, - { - "epoch": 1.9833271451051115, - "grad_norm": 1.4566655158996582, - "learning_rate": 1.6944547581581477e-05, - "loss": 0.6111, - "step": 224350 - }, - { - "epoch": 1.9834155483654237, - "grad_norm": 15.666956901550293, - "learning_rate": 1.6943074193909605e-05, - "loss": 0.7757, - "step": 224360 - }, - { - "epoch": 1.983503951625736, - "grad_norm": 4.194991111755371, - "learning_rate": 1.6941600806237734e-05, - "loss": 0.4925, - "step": 224370 - }, - { - "epoch": 1.9835923548860483, - "grad_norm": 8.081720352172852, - "learning_rate": 1.6940127418565865e-05, - "loss": 0.5839, - "step": 224380 - }, - { - "epoch": 1.9836807581463605, - "grad_norm": 8.042984008789062, - "learning_rate": 1.6938654030893994e-05, - "loss": 0.5748, - "step": 224390 - }, - { - "epoch": 1.9837691614066726, - "grad_norm": 2.2797820568084717, - "learning_rate": 1.6937180643222122e-05, - "loss": 0.5712, - "step": 224400 - }, - { - "epoch": 1.983857564666985, - "grad_norm": 3.472729444503784, - "learning_rate": 1.693570725555025e-05, - "loss": 0.5932, - "step": 224410 - }, - { - "epoch": 1.9839459679272973, - "grad_norm": 6.893070220947266, - "learning_rate": 1.6934233867878382e-05, - "loss": 0.5428, - "step": 224420 - }, - { - "epoch": 1.9840343711876094, - "grad_norm": 6.0669403076171875, - "learning_rate": 1.693276048020651e-05, - "loss": 0.5413, - "step": 224430 - }, - { - "epoch": 1.9841227744479215, - "grad_norm": 0.7792112231254578, - "learning_rate": 1.693128709253464e-05, - "loss": 0.4702, - "step": 224440 - }, - { - "epoch": 1.9842111777082339, - "grad_norm": 1.6176273822784424, - "learning_rate": 1.692981370486277e-05, - "loss": 0.6868, - "step": 224450 - }, - { - "epoch": 1.9842995809685462, - "grad_norm": 4.055951118469238, - "learning_rate": 1.69283403171909e-05, - "loss": 0.5389, - "step": 224460 - }, - { - "epoch": 1.9843879842288583, - "grad_norm": 2.720865249633789, - "learning_rate": 1.6926866929519027e-05, - "loss": 0.6477, - "step": 224470 - }, - { - "epoch": 1.9844763874891704, - "grad_norm": 1.8405334949493408, - "learning_rate": 1.6925393541847156e-05, - "loss": 0.5865, - "step": 224480 - }, - { - "epoch": 1.984564790749483, - "grad_norm": 1.7914783954620361, - "learning_rate": 1.6923920154175287e-05, - "loss": 0.5495, - "step": 224490 - }, - { - "epoch": 1.9846531940097951, - "grad_norm": 3.9661803245544434, - "learning_rate": 1.6922446766503416e-05, - "loss": 0.4685, - "step": 224500 - }, - { - "epoch": 1.9847415972701072, - "grad_norm": 5.78141975402832, - "learning_rate": 1.6920973378831544e-05, - "loss": 0.7583, - "step": 224510 - }, - { - "epoch": 1.9848300005304196, - "grad_norm": 2.5443613529205322, - "learning_rate": 1.6919499991159673e-05, - "loss": 0.717, - "step": 224520 - }, - { - "epoch": 1.984918403790732, - "grad_norm": 1.7132166624069214, - "learning_rate": 1.6918026603487804e-05, - "loss": 0.5181, - "step": 224530 - }, - { - "epoch": 1.985006807051044, - "grad_norm": 3.477186441421509, - "learning_rate": 1.6916553215815933e-05, - "loss": 0.5612, - "step": 224540 - }, - { - "epoch": 1.9850952103113562, - "grad_norm": 1.499049186706543, - "learning_rate": 1.691507982814406e-05, - "loss": 0.5493, - "step": 224550 - }, - { - "epoch": 1.9851836135716685, - "grad_norm": 1.847678303718567, - "learning_rate": 1.6913606440472193e-05, - "loss": 0.6777, - "step": 224560 - }, - { - "epoch": 1.9852720168319808, - "grad_norm": 3.1537466049194336, - "learning_rate": 1.691213305280032e-05, - "loss": 0.4916, - "step": 224570 - }, - { - "epoch": 1.985360420092293, - "grad_norm": 3.695202350616455, - "learning_rate": 1.691065966512845e-05, - "loss": 0.5396, - "step": 224580 - }, - { - "epoch": 1.985448823352605, - "grad_norm": 7.094969749450684, - "learning_rate": 1.6909186277456578e-05, - "loss": 0.5074, - "step": 224590 - }, - { - "epoch": 1.9855372266129176, - "grad_norm": 2.075542688369751, - "learning_rate": 1.690771288978471e-05, - "loss": 0.5758, - "step": 224600 - }, - { - "epoch": 1.9856256298732298, - "grad_norm": 1.3143478631973267, - "learning_rate": 1.6906239502112838e-05, - "loss": 0.5097, - "step": 224610 - }, - { - "epoch": 1.985714033133542, - "grad_norm": 3.3322737216949463, - "learning_rate": 1.6904766114440966e-05, - "loss": 0.645, - "step": 224620 - }, - { - "epoch": 1.9858024363938542, - "grad_norm": 1.4347734451293945, - "learning_rate": 1.6903292726769095e-05, - "loss": 0.5514, - "step": 224630 - }, - { - "epoch": 1.9858908396541666, - "grad_norm": 3.8193883895874023, - "learning_rate": 1.6901819339097226e-05, - "loss": 0.6198, - "step": 224640 - }, - { - "epoch": 1.9859792429144787, - "grad_norm": 1.811673641204834, - "learning_rate": 1.6900345951425355e-05, - "loss": 0.5617, - "step": 224650 - }, - { - "epoch": 1.9860676461747908, - "grad_norm": 2.457360029220581, - "learning_rate": 1.6898872563753483e-05, - "loss": 0.5595, - "step": 224660 - }, - { - "epoch": 1.9861560494351032, - "grad_norm": 4.07906436920166, - "learning_rate": 1.6897399176081615e-05, - "loss": 0.5444, - "step": 224670 - }, - { - "epoch": 1.9862444526954155, - "grad_norm": 4.393835067749023, - "learning_rate": 1.6895925788409743e-05, - "loss": 0.6519, - "step": 224680 - }, - { - "epoch": 1.9863328559557276, - "grad_norm": 3.4482831954956055, - "learning_rate": 1.689445240073787e-05, - "loss": 0.502, - "step": 224690 - }, - { - "epoch": 1.9864212592160397, - "grad_norm": 1.3472683429718018, - "learning_rate": 1.6892979013066e-05, - "loss": 0.5358, - "step": 224700 - }, - { - "epoch": 1.986509662476352, - "grad_norm": 1.7818764448165894, - "learning_rate": 1.689150562539413e-05, - "loss": 0.4806, - "step": 224710 - }, - { - "epoch": 1.9865980657366644, - "grad_norm": 2.1879148483276367, - "learning_rate": 1.689003223772226e-05, - "loss": 0.5691, - "step": 224720 - }, - { - "epoch": 1.9866864689969765, - "grad_norm": 2.7464609146118164, - "learning_rate": 1.6888558850050388e-05, - "loss": 0.5308, - "step": 224730 - }, - { - "epoch": 1.9867748722572889, - "grad_norm": 2.4121510982513428, - "learning_rate": 1.688708546237852e-05, - "loss": 0.577, - "step": 224740 - }, - { - "epoch": 1.9868632755176012, - "grad_norm": 1.6558781862258911, - "learning_rate": 1.688561207470665e-05, - "loss": 0.6469, - "step": 224750 - }, - { - "epoch": 1.9869516787779133, - "grad_norm": 9.172469139099121, - "learning_rate": 1.6884138687034777e-05, - "loss": 0.5378, - "step": 224760 - }, - { - "epoch": 1.9870400820382255, - "grad_norm": 1.5967637300491333, - "learning_rate": 1.688266529936291e-05, - "loss": 0.6133, - "step": 224770 - }, - { - "epoch": 1.9871284852985378, - "grad_norm": 1.2075780630111694, - "learning_rate": 1.6881191911691037e-05, - "loss": 0.5244, - "step": 224780 - }, - { - "epoch": 1.9872168885588501, - "grad_norm": 0.8148615956306458, - "learning_rate": 1.6879718524019165e-05, - "loss": 0.628, - "step": 224790 - }, - { - "epoch": 1.9873052918191623, - "grad_norm": 1.3674981594085693, - "learning_rate": 1.6878245136347297e-05, - "loss": 0.5519, - "step": 224800 - }, - { - "epoch": 1.9873936950794744, - "grad_norm": 0.957788348197937, - "learning_rate": 1.6876771748675425e-05, - "loss": 0.5269, - "step": 224810 - }, - { - "epoch": 1.9874820983397867, - "grad_norm": 3.036616086959839, - "learning_rate": 1.6875298361003554e-05, - "loss": 0.5956, - "step": 224820 - }, - { - "epoch": 1.987570501600099, - "grad_norm": 1.987912893295288, - "learning_rate": 1.6873824973331685e-05, - "loss": 0.5781, - "step": 224830 - }, - { - "epoch": 1.9876589048604112, - "grad_norm": 6.283286094665527, - "learning_rate": 1.6872351585659814e-05, - "loss": 0.5509, - "step": 224840 - }, - { - "epoch": 1.9877473081207235, - "grad_norm": 1.9108779430389404, - "learning_rate": 1.6870878197987945e-05, - "loss": 0.6328, - "step": 224850 - }, - { - "epoch": 1.9878357113810359, - "grad_norm": 1.2929589748382568, - "learning_rate": 1.6869404810316074e-05, - "loss": 0.482, - "step": 224860 - }, - { - "epoch": 1.987924114641348, - "grad_norm": 4.726243495941162, - "learning_rate": 1.6867931422644202e-05, - "loss": 0.5229, - "step": 224870 - }, - { - "epoch": 1.9880125179016601, - "grad_norm": 1.9832627773284912, - "learning_rate": 1.686645803497233e-05, - "loss": 0.6598, - "step": 224880 - }, - { - "epoch": 1.9881009211619725, - "grad_norm": 1.8016328811645508, - "learning_rate": 1.6864984647300462e-05, - "loss": 0.5763, - "step": 224890 - }, - { - "epoch": 1.9881893244222848, - "grad_norm": 1.620561122894287, - "learning_rate": 1.686351125962859e-05, - "loss": 0.4979, - "step": 224900 - }, - { - "epoch": 1.988277727682597, - "grad_norm": 4.599083423614502, - "learning_rate": 1.686203787195672e-05, - "loss": 0.5075, - "step": 224910 - }, - { - "epoch": 1.988366130942909, - "grad_norm": 3.026254415512085, - "learning_rate": 1.686056448428485e-05, - "loss": 0.6115, - "step": 224920 - }, - { - "epoch": 1.9884545342032214, - "grad_norm": 1.3611140251159668, - "learning_rate": 1.685909109661298e-05, - "loss": 0.5479, - "step": 224930 - }, - { - "epoch": 1.9885429374635337, - "grad_norm": 7.72886323928833, - "learning_rate": 1.6857617708941107e-05, - "loss": 0.4639, - "step": 224940 - }, - { - "epoch": 1.9886313407238458, - "grad_norm": 4.344997406005859, - "learning_rate": 1.6856144321269236e-05, - "loss": 0.633, - "step": 224950 - }, - { - "epoch": 1.9887197439841582, - "grad_norm": 4.46109676361084, - "learning_rate": 1.6854670933597368e-05, - "loss": 0.6392, - "step": 224960 - }, - { - "epoch": 1.9888081472444705, - "grad_norm": 11.079893112182617, - "learning_rate": 1.6853197545925496e-05, - "loss": 0.6176, - "step": 224970 - }, - { - "epoch": 1.9888965505047826, - "grad_norm": 5.218672752380371, - "learning_rate": 1.6851724158253624e-05, - "loss": 0.5178, - "step": 224980 - }, - { - "epoch": 1.9889849537650948, - "grad_norm": 2.4219586849212646, - "learning_rate": 1.6850250770581753e-05, - "loss": 0.5192, - "step": 224990 - }, - { - "epoch": 1.989073357025407, - "grad_norm": 4.831282615661621, - "learning_rate": 1.6848777382909884e-05, - "loss": 0.5875, - "step": 225000 - }, - { - "epoch": 1.9891617602857194, - "grad_norm": 2.6583251953125, - "learning_rate": 1.6847303995238013e-05, - "loss": 0.5748, - "step": 225010 - }, - { - "epoch": 1.9892501635460316, - "grad_norm": 1.1268571615219116, - "learning_rate": 1.684583060756614e-05, - "loss": 0.5148, - "step": 225020 - }, - { - "epoch": 1.9893385668063437, - "grad_norm": 3.306156635284424, - "learning_rate": 1.6844357219894273e-05, - "loss": 0.6196, - "step": 225030 - }, - { - "epoch": 1.989426970066656, - "grad_norm": 1.5640621185302734, - "learning_rate": 1.68428838322224e-05, - "loss": 0.5988, - "step": 225040 - }, - { - "epoch": 1.9895153733269684, - "grad_norm": 2.0384891033172607, - "learning_rate": 1.684141044455053e-05, - "loss": 0.7041, - "step": 225050 - }, - { - "epoch": 1.9896037765872805, - "grad_norm": 2.0110831260681152, - "learning_rate": 1.6839937056878658e-05, - "loss": 0.6548, - "step": 225060 - }, - { - "epoch": 1.9896921798475928, - "grad_norm": 2.0337533950805664, - "learning_rate": 1.683846366920679e-05, - "loss": 0.6102, - "step": 225070 - }, - { - "epoch": 1.9897805831079052, - "grad_norm": 1.8111612796783447, - "learning_rate": 1.6836990281534918e-05, - "loss": 0.4591, - "step": 225080 - }, - { - "epoch": 1.9898689863682173, - "grad_norm": 1.1000893115997314, - "learning_rate": 1.6835516893863046e-05, - "loss": 0.5735, - "step": 225090 - }, - { - "epoch": 1.9899573896285294, - "grad_norm": 1.8954603672027588, - "learning_rate": 1.6834043506191175e-05, - "loss": 0.5119, - "step": 225100 - }, - { - "epoch": 1.9900457928888418, - "grad_norm": 1.3520907163619995, - "learning_rate": 1.6832570118519306e-05, - "loss": 0.6305, - "step": 225110 - }, - { - "epoch": 1.990134196149154, - "grad_norm": 5.1060967445373535, - "learning_rate": 1.6831096730847435e-05, - "loss": 0.6116, - "step": 225120 - }, - { - "epoch": 1.9902225994094662, - "grad_norm": 3.5641541481018066, - "learning_rate": 1.6829623343175563e-05, - "loss": 0.4878, - "step": 225130 - }, - { - "epoch": 1.9903110026697783, - "grad_norm": 5.637264728546143, - "learning_rate": 1.6828149955503695e-05, - "loss": 0.5807, - "step": 225140 - }, - { - "epoch": 1.9903994059300907, - "grad_norm": 3.1112334728240967, - "learning_rate": 1.6826676567831823e-05, - "loss": 0.6752, - "step": 225150 - }, - { - "epoch": 1.990487809190403, - "grad_norm": 4.424471855163574, - "learning_rate": 1.682520318015995e-05, - "loss": 0.4787, - "step": 225160 - }, - { - "epoch": 1.9905762124507151, - "grad_norm": 1.129172921180725, - "learning_rate": 1.682372979248808e-05, - "loss": 0.5729, - "step": 225170 - }, - { - "epoch": 1.9906646157110273, - "grad_norm": 0.8466700911521912, - "learning_rate": 1.682225640481621e-05, - "loss": 0.5369, - "step": 225180 - }, - { - "epoch": 1.9907530189713398, - "grad_norm": 2.4012086391448975, - "learning_rate": 1.682078301714434e-05, - "loss": 0.6113, - "step": 225190 - }, - { - "epoch": 1.990841422231652, - "grad_norm": 3.801825761795044, - "learning_rate": 1.681930962947247e-05, - "loss": 0.6716, - "step": 225200 - }, - { - "epoch": 1.990929825491964, - "grad_norm": 4.74006462097168, - "learning_rate": 1.68178362418006e-05, - "loss": 0.7159, - "step": 225210 - }, - { - "epoch": 1.9910182287522764, - "grad_norm": 2.6332221031188965, - "learning_rate": 1.681636285412873e-05, - "loss": 0.4632, - "step": 225220 - }, - { - "epoch": 1.9911066320125888, - "grad_norm": 1.048154592514038, - "learning_rate": 1.6814889466456857e-05, - "loss": 0.4734, - "step": 225230 - }, - { - "epoch": 1.9911950352729009, - "grad_norm": 13.232263565063477, - "learning_rate": 1.6813416078784985e-05, - "loss": 0.4885, - "step": 225240 - }, - { - "epoch": 1.991283438533213, - "grad_norm": 0.8549413084983826, - "learning_rate": 1.6811942691113117e-05, - "loss": 0.541, - "step": 225250 - }, - { - "epoch": 1.9913718417935253, - "grad_norm": 4.279587268829346, - "learning_rate": 1.6810469303441245e-05, - "loss": 0.6136, - "step": 225260 - }, - { - "epoch": 1.9914602450538377, - "grad_norm": 4.3734893798828125, - "learning_rate": 1.6808995915769374e-05, - "loss": 0.6915, - "step": 225270 - }, - { - "epoch": 1.9915486483141498, - "grad_norm": 0.9106436371803284, - "learning_rate": 1.6807522528097502e-05, - "loss": 0.5335, - "step": 225280 - }, - { - "epoch": 1.991637051574462, - "grad_norm": 1.1319162845611572, - "learning_rate": 1.6806049140425634e-05, - "loss": 0.5117, - "step": 225290 - }, - { - "epoch": 1.9917254548347743, - "grad_norm": 10.555523872375488, - "learning_rate": 1.6804575752753762e-05, - "loss": 0.58, - "step": 225300 - }, - { - "epoch": 1.9918138580950866, - "grad_norm": 5.483819961547852, - "learning_rate": 1.680310236508189e-05, - "loss": 0.5905, - "step": 225310 - }, - { - "epoch": 1.9919022613553987, - "grad_norm": 6.8603315353393555, - "learning_rate": 1.6801628977410022e-05, - "loss": 0.6074, - "step": 225320 - }, - { - "epoch": 1.991990664615711, - "grad_norm": 1.3294624090194702, - "learning_rate": 1.680015558973815e-05, - "loss": 0.5694, - "step": 225330 - }, - { - "epoch": 1.9920790678760234, - "grad_norm": 12.74087142944336, - "learning_rate": 1.679868220206628e-05, - "loss": 0.7074, - "step": 225340 - }, - { - "epoch": 1.9921674711363355, - "grad_norm": 1.852597713470459, - "learning_rate": 1.6797208814394407e-05, - "loss": 0.5257, - "step": 225350 - }, - { - "epoch": 1.9922558743966476, - "grad_norm": 11.889374732971191, - "learning_rate": 1.679573542672254e-05, - "loss": 0.4863, - "step": 225360 - }, - { - "epoch": 1.99234427765696, - "grad_norm": 2.9477877616882324, - "learning_rate": 1.6794262039050667e-05, - "loss": 0.4494, - "step": 225370 - }, - { - "epoch": 1.9924326809172723, - "grad_norm": 10.925512313842773, - "learning_rate": 1.6792788651378796e-05, - "loss": 0.6253, - "step": 225380 - }, - { - "epoch": 1.9925210841775844, - "grad_norm": 5.6802449226379395, - "learning_rate": 1.6791315263706927e-05, - "loss": 0.6502, - "step": 225390 - }, - { - "epoch": 1.9926094874378966, - "grad_norm": 1.6207715272903442, - "learning_rate": 1.6789841876035056e-05, - "loss": 0.5674, - "step": 225400 - }, - { - "epoch": 1.992697890698209, - "grad_norm": 18.84044075012207, - "learning_rate": 1.6788368488363184e-05, - "loss": 0.4871, - "step": 225410 - }, - { - "epoch": 1.9927862939585212, - "grad_norm": 1.179420828819275, - "learning_rate": 1.6786895100691312e-05, - "loss": 0.6922, - "step": 225420 - }, - { - "epoch": 1.9928746972188334, - "grad_norm": 0.961736261844635, - "learning_rate": 1.6785421713019444e-05, - "loss": 0.5083, - "step": 225430 - }, - { - "epoch": 1.9929631004791457, - "grad_norm": 5.554663181304932, - "learning_rate": 1.6783948325347573e-05, - "loss": 0.729, - "step": 225440 - }, - { - "epoch": 1.993051503739458, - "grad_norm": 8.339944839477539, - "learning_rate": 1.67824749376757e-05, - "loss": 0.5474, - "step": 225450 - }, - { - "epoch": 1.9931399069997702, - "grad_norm": 4.111626148223877, - "learning_rate": 1.678100155000383e-05, - "loss": 0.5849, - "step": 225460 - }, - { - "epoch": 1.9932283102600823, - "grad_norm": 2.2576897144317627, - "learning_rate": 1.677952816233196e-05, - "loss": 0.4451, - "step": 225470 - }, - { - "epoch": 1.9933167135203946, - "grad_norm": 4.997096061706543, - "learning_rate": 1.677805477466009e-05, - "loss": 0.604, - "step": 225480 - }, - { - "epoch": 1.993405116780707, - "grad_norm": 2.46445369720459, - "learning_rate": 1.6776581386988218e-05, - "loss": 0.7684, - "step": 225490 - }, - { - "epoch": 1.993493520041019, - "grad_norm": 2.6454429626464844, - "learning_rate": 1.677510799931635e-05, - "loss": 0.6637, - "step": 225500 - }, - { - "epoch": 1.9935819233013312, - "grad_norm": 4.765692234039307, - "learning_rate": 1.6773634611644478e-05, - "loss": 0.7253, - "step": 225510 - }, - { - "epoch": 1.9936703265616436, - "grad_norm": 1.414405107498169, - "learning_rate": 1.6772161223972606e-05, - "loss": 0.4885, - "step": 225520 - }, - { - "epoch": 1.993758729821956, - "grad_norm": 5.145718574523926, - "learning_rate": 1.6770687836300735e-05, - "loss": 0.6276, - "step": 225530 - }, - { - "epoch": 1.993847133082268, - "grad_norm": 2.5033962726593018, - "learning_rate": 1.6769214448628866e-05, - "loss": 0.5412, - "step": 225540 - }, - { - "epoch": 1.9939355363425804, - "grad_norm": 2.069969654083252, - "learning_rate": 1.6767741060956995e-05, - "loss": 0.7213, - "step": 225550 - }, - { - "epoch": 1.9940239396028927, - "grad_norm": 4.3891777992248535, - "learning_rate": 1.6766267673285123e-05, - "loss": 0.5545, - "step": 225560 - }, - { - "epoch": 1.9941123428632048, - "grad_norm": 1.4024473428726196, - "learning_rate": 1.6764794285613255e-05, - "loss": 0.6344, - "step": 225570 - }, - { - "epoch": 1.994200746123517, - "grad_norm": 3.2352919578552246, - "learning_rate": 1.6763320897941383e-05, - "loss": 0.5767, - "step": 225580 - }, - { - "epoch": 1.9942891493838293, - "grad_norm": 2.1905648708343506, - "learning_rate": 1.676184751026951e-05, - "loss": 0.5415, - "step": 225590 - }, - { - "epoch": 1.9943775526441416, - "grad_norm": 5.467777729034424, - "learning_rate": 1.676037412259764e-05, - "loss": 0.5505, - "step": 225600 - }, - { - "epoch": 1.9944659559044537, - "grad_norm": 5.634042739868164, - "learning_rate": 1.675890073492577e-05, - "loss": 0.7058, - "step": 225610 - }, - { - "epoch": 1.9945543591647659, - "grad_norm": 23.753089904785156, - "learning_rate": 1.67574273472539e-05, - "loss": 0.7105, - "step": 225620 - }, - { - "epoch": 1.9946427624250782, - "grad_norm": 2.8161308765411377, - "learning_rate": 1.6755953959582028e-05, - "loss": 0.5731, - "step": 225630 - }, - { - "epoch": 1.9947311656853906, - "grad_norm": 12.272192001342773, - "learning_rate": 1.675448057191016e-05, - "loss": 0.6786, - "step": 225640 - }, - { - "epoch": 1.9948195689457027, - "grad_norm": 2.276580572128296, - "learning_rate": 1.675300718423829e-05, - "loss": 0.6314, - "step": 225650 - }, - { - "epoch": 1.994907972206015, - "grad_norm": 1.5143632888793945, - "learning_rate": 1.6751533796566417e-05, - "loss": 0.6322, - "step": 225660 - }, - { - "epoch": 1.9949963754663274, - "grad_norm": 5.343900203704834, - "learning_rate": 1.675006040889455e-05, - "loss": 0.5836, - "step": 225670 - }, - { - "epoch": 1.9950847787266395, - "grad_norm": 3.547680139541626, - "learning_rate": 1.6748587021222677e-05, - "loss": 0.595, - "step": 225680 - }, - { - "epoch": 1.9951731819869516, - "grad_norm": 1.290130376815796, - "learning_rate": 1.6747113633550805e-05, - "loss": 0.5369, - "step": 225690 - }, - { - "epoch": 1.995261585247264, - "grad_norm": 1.1940443515777588, - "learning_rate": 1.6745640245878937e-05, - "loss": 0.591, - "step": 225700 - }, - { - "epoch": 1.9953499885075763, - "grad_norm": 4.946343898773193, - "learning_rate": 1.6744166858207065e-05, - "loss": 0.5971, - "step": 225710 - }, - { - "epoch": 1.9954383917678884, - "grad_norm": 11.978418350219727, - "learning_rate": 1.6742693470535194e-05, - "loss": 0.5623, - "step": 225720 - }, - { - "epoch": 1.9955267950282005, - "grad_norm": 1.3770660161972046, - "learning_rate": 1.6741220082863325e-05, - "loss": 0.6232, - "step": 225730 - }, - { - "epoch": 1.9956151982885129, - "grad_norm": 1.5341140031814575, - "learning_rate": 1.6739746695191454e-05, - "loss": 0.6196, - "step": 225740 - }, - { - "epoch": 1.9957036015488252, - "grad_norm": 1.2397600412368774, - "learning_rate": 1.6738273307519582e-05, - "loss": 0.4289, - "step": 225750 - }, - { - "epoch": 1.9957920048091373, - "grad_norm": 2.2048261165618896, - "learning_rate": 1.6736799919847714e-05, - "loss": 0.6551, - "step": 225760 - }, - { - "epoch": 1.9958804080694494, - "grad_norm": 2.451563835144043, - "learning_rate": 1.6735326532175842e-05, - "loss": 0.6384, - "step": 225770 - }, - { - "epoch": 1.995968811329762, - "grad_norm": 1.699855089187622, - "learning_rate": 1.673385314450397e-05, - "loss": 0.4734, - "step": 225780 - }, - { - "epoch": 1.9960572145900741, - "grad_norm": 3.1928694248199463, - "learning_rate": 1.6732379756832102e-05, - "loss": 0.567, - "step": 225790 - }, - { - "epoch": 1.9961456178503862, - "grad_norm": 2.2834367752075195, - "learning_rate": 1.673090636916023e-05, - "loss": 0.4598, - "step": 225800 - }, - { - "epoch": 1.9962340211106986, - "grad_norm": 1.8125332593917847, - "learning_rate": 1.672943298148836e-05, - "loss": 0.5784, - "step": 225810 - }, - { - "epoch": 1.996322424371011, - "grad_norm": 1.8841636180877686, - "learning_rate": 1.6727959593816487e-05, - "loss": 0.6716, - "step": 225820 - }, - { - "epoch": 1.996410827631323, - "grad_norm": 1.5954008102416992, - "learning_rate": 1.672648620614462e-05, - "loss": 0.5947, - "step": 225830 - }, - { - "epoch": 1.9964992308916352, - "grad_norm": 2.470388174057007, - "learning_rate": 1.6725012818472747e-05, - "loss": 0.6013, - "step": 225840 - }, - { - "epoch": 1.9965876341519475, - "grad_norm": 0.8296368718147278, - "learning_rate": 1.6723539430800876e-05, - "loss": 0.6322, - "step": 225850 - }, - { - "epoch": 1.9966760374122599, - "grad_norm": 2.4908385276794434, - "learning_rate": 1.6722066043129007e-05, - "loss": 0.6579, - "step": 225860 - }, - { - "epoch": 1.996764440672572, - "grad_norm": 3.1366007328033447, - "learning_rate": 1.6720592655457136e-05, - "loss": 0.6049, - "step": 225870 - }, - { - "epoch": 1.996852843932884, - "grad_norm": 2.1875829696655273, - "learning_rate": 1.6719119267785264e-05, - "loss": 0.5508, - "step": 225880 - }, - { - "epoch": 1.9969412471931964, - "grad_norm": 2.4100494384765625, - "learning_rate": 1.6717645880113393e-05, - "loss": 0.4558, - "step": 225890 - }, - { - "epoch": 1.9970296504535088, - "grad_norm": 2.559838056564331, - "learning_rate": 1.6716172492441524e-05, - "loss": 0.591, - "step": 225900 - }, - { - "epoch": 1.997118053713821, - "grad_norm": 1.209906816482544, - "learning_rate": 1.6714699104769653e-05, - "loss": 0.5157, - "step": 225910 - }, - { - "epoch": 1.9972064569741332, - "grad_norm": 4.076237678527832, - "learning_rate": 1.671322571709778e-05, - "loss": 0.5812, - "step": 225920 - }, - { - "epoch": 1.9972948602344456, - "grad_norm": 13.479727745056152, - "learning_rate": 1.671175232942591e-05, - "loss": 0.5305, - "step": 225930 - }, - { - "epoch": 1.9973832634947577, - "grad_norm": 2.303053617477417, - "learning_rate": 1.671027894175404e-05, - "loss": 0.5326, - "step": 225940 - }, - { - "epoch": 1.9974716667550698, - "grad_norm": 5.860489368438721, - "learning_rate": 1.670880555408217e-05, - "loss": 0.5122, - "step": 225950 - }, - { - "epoch": 1.9975600700153822, - "grad_norm": 8.860002517700195, - "learning_rate": 1.6707332166410298e-05, - "loss": 0.5731, - "step": 225960 - }, - { - "epoch": 1.9976484732756945, - "grad_norm": 1.4590798616409302, - "learning_rate": 1.670585877873843e-05, - "loss": 0.5888, - "step": 225970 - }, - { - "epoch": 1.9977368765360066, - "grad_norm": 7.0802388191223145, - "learning_rate": 1.6704385391066558e-05, - "loss": 0.6654, - "step": 225980 - }, - { - "epoch": 1.9978252797963187, - "grad_norm": 3.268148422241211, - "learning_rate": 1.6702912003394686e-05, - "loss": 0.6022, - "step": 225990 - }, - { - "epoch": 1.997913683056631, - "grad_norm": 6.230152606964111, - "learning_rate": 1.6701438615722815e-05, - "loss": 0.5004, - "step": 226000 - }, - { - "epoch": 1.9980020863169434, - "grad_norm": 2.543973445892334, - "learning_rate": 1.6699965228050946e-05, - "loss": 0.5898, - "step": 226010 - }, - { - "epoch": 1.9980904895772555, - "grad_norm": 10.80360221862793, - "learning_rate": 1.6698491840379075e-05, - "loss": 0.6368, - "step": 226020 - }, - { - "epoch": 1.9981788928375679, - "grad_norm": 1.3303838968276978, - "learning_rate": 1.6697018452707203e-05, - "loss": 0.6188, - "step": 226030 - }, - { - "epoch": 1.9982672960978802, - "grad_norm": 1.1822746992111206, - "learning_rate": 1.6695545065035335e-05, - "loss": 0.5125, - "step": 226040 - }, - { - "epoch": 1.9983556993581923, - "grad_norm": 3.7949411869049072, - "learning_rate": 1.6694071677363463e-05, - "loss": 0.5073, - "step": 226050 - }, - { - "epoch": 1.9984441026185045, - "grad_norm": 0.9324376583099365, - "learning_rate": 1.669259828969159e-05, - "loss": 0.5405, - "step": 226060 - }, - { - "epoch": 1.9985325058788168, - "grad_norm": 2.840153217315674, - "learning_rate": 1.669112490201972e-05, - "loss": 0.496, - "step": 226070 - }, - { - "epoch": 1.9986209091391292, - "grad_norm": 2.08803653717041, - "learning_rate": 1.668965151434785e-05, - "loss": 0.5232, - "step": 226080 - }, - { - "epoch": 1.9987093123994413, - "grad_norm": 2.0550999641418457, - "learning_rate": 1.668817812667598e-05, - "loss": 0.4621, - "step": 226090 - }, - { - "epoch": 1.9987977156597534, - "grad_norm": 1.2500879764556885, - "learning_rate": 1.6686704739004108e-05, - "loss": 0.5219, - "step": 226100 - }, - { - "epoch": 1.9988861189200657, - "grad_norm": 1.7983343601226807, - "learning_rate": 1.6685231351332237e-05, - "loss": 0.514, - "step": 226110 - }, - { - "epoch": 1.998974522180378, - "grad_norm": 4.482465744018555, - "learning_rate": 1.668375796366037e-05, - "loss": 0.569, - "step": 226120 - }, - { - "epoch": 1.9990629254406902, - "grad_norm": 4.159267425537109, - "learning_rate": 1.6682284575988497e-05, - "loss": 0.67, - "step": 226130 - }, - { - "epoch": 1.9991513287010025, - "grad_norm": 1.8467906713485718, - "learning_rate": 1.6680811188316625e-05, - "loss": 0.601, - "step": 226140 - }, - { - "epoch": 1.9992397319613149, - "grad_norm": 6.043004989624023, - "learning_rate": 1.6679337800644757e-05, - "loss": 0.4399, - "step": 226150 - }, - { - "epoch": 1.999328135221627, - "grad_norm": 2.3459484577178955, - "learning_rate": 1.6677864412972885e-05, - "loss": 0.645, - "step": 226160 - }, - { - "epoch": 1.9994165384819391, - "grad_norm": 2.997763156890869, - "learning_rate": 1.6676391025301014e-05, - "loss": 0.5968, - "step": 226170 - }, - { - "epoch": 1.9995049417422515, - "grad_norm": 19.69053077697754, - "learning_rate": 1.6674917637629142e-05, - "loss": 0.6163, - "step": 226180 - }, - { - "epoch": 1.9995933450025638, - "grad_norm": 2.2578210830688477, - "learning_rate": 1.6673444249957274e-05, - "loss": 0.5128, - "step": 226190 - }, - { - "epoch": 1.999681748262876, - "grad_norm": 1.19893479347229, - "learning_rate": 1.6671970862285402e-05, - "loss": 0.6074, - "step": 226200 - }, - { - "epoch": 1.999770151523188, - "grad_norm": 1.4854111671447754, - "learning_rate": 1.667049747461353e-05, - "loss": 0.6433, - "step": 226210 - }, - { - "epoch": 1.9998585547835004, - "grad_norm": 1.1448254585266113, - "learning_rate": 1.666902408694166e-05, - "loss": 0.6176, - "step": 226220 - }, - { - "epoch": 1.9999469580438127, - "grad_norm": 5.088480472564697, - "learning_rate": 1.666755069926979e-05, - "loss": 0.6296, - "step": 226230 - }, - { - "epoch": 2.0, - "eval_loss": 0.5896387696266174, - "eval_runtime": 1557.2969, - "eval_samples_per_second": 290.548, - "eval_steps_per_second": 18.16, - "step": 226236 - } - ], - "logging_steps": 10, - "max_steps": 339354, - "num_input_tokens_seen": 0, - "num_train_epochs": 3, - "save_steps": 500, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 3.083718185776128e+17, - "train_batch_size": 16, - "trial_name": null, - "trial_params": null -}